UI:n system prompt ja sampling-parametrit välittyvät inferenssiin asti
Frontend lähettää agentin asetukset (system_prompt, temperature, top_k, max_tokens, repeat_penalty, stop) API:lle. Hub välittää ne solmulle. Native-node ja Wasm-coder käyttävät välitettyjä arvoja hardkoodattujen sijaan.
This commit is contained in:
@@ -486,7 +486,7 @@ OUTPUT FORMAT:
|
|||||||
// === kpnRun: lähettää promptin mallille ===
|
// === kpnRun: lähettää promptin mallille ===
|
||||||
const activeStreams = {};
|
const activeStreams = {};
|
||||||
|
|
||||||
async function kpnRun(model, prompt, silent) {
|
async function kpnRun(model, prompt, silent, agentOpts) {
|
||||||
const taskId = crypto.randomUUID();
|
const taskId = crypto.randomUUID();
|
||||||
const statusDiv = document.createElement('div');
|
const statusDiv = document.createElement('div');
|
||||||
statusDiv.className = 'terminal-line';
|
statusDiv.className = 'terminal-line';
|
||||||
@@ -511,10 +511,24 @@ OUTPUT FORMAT:
|
|||||||
|
|
||||||
statusDiv.innerHTML = ` <span style="color:#8b949e">→ <span style="color:var(--accent)">${model}</span> käsittelee...</span>`;
|
statusDiv.innerHTML = ` <span style="color:#8b949e">→ <span style="color:var(--accent)">${model}</span> käsittelee...</span>`;
|
||||||
|
|
||||||
|
// Rakennetaan pyyntö: agentin asetukset tai globaalit oletukset
|
||||||
|
const opts = agentOpts || {};
|
||||||
|
const payload = {
|
||||||
|
model,
|
||||||
|
prompt,
|
||||||
|
task_id: taskId,
|
||||||
|
system_prompt: opts.systemPrompt || settings.systemPrompt || undefined,
|
||||||
|
temperature: opts.temperature ?? settings.temperature ?? undefined,
|
||||||
|
top_k: opts.topK ?? settings.topK ?? undefined,
|
||||||
|
max_tokens: opts.maxTokens ?? settings.maxTokens ?? undefined,
|
||||||
|
repeat_penalty: opts.repeatPenalty ?? settings.repeatPenalty ?? undefined,
|
||||||
|
stop: settings.stopSequences ? settings.stopSequences.split('\\n').filter(Boolean) : undefined,
|
||||||
|
};
|
||||||
|
|
||||||
const res = await fetch('/api/v1/chat/completions', {
|
const res = await fetch('/api/v1/chat/completions', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ model, prompt, task_id: taskId }),
|
body: JSON.stringify(payload),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.status === 503 && !wasmNodeStarted) {
|
if (res.status === 503 && !wasmNodeStarted) {
|
||||||
@@ -792,7 +806,7 @@ OUTPUT FORMAT:
|
|||||||
prompt += fileDef.instructions + '\n';
|
prompt += fileDef.instructions + '\n';
|
||||||
prompt += 'Adapt the example to match the project description. Import from already written files. Write ONLY the code, no explanations.';
|
prompt += 'Adapt the example to match the project description. Import from already written files. Write ONLY the code, no explanations.';
|
||||||
|
|
||||||
const code = await kpnRun(fileAgent.model, prompt);
|
const code = await kpnRun(fileAgent.model, prompt, false, fileAgent);
|
||||||
if (!code) {
|
if (!code) {
|
||||||
termLog(` ✗ Keskeytyi (${fileName})`, '#f85149');
|
termLog(` ✗ Keskeytyi (${fileName})`, '#f85149');
|
||||||
return;
|
return;
|
||||||
@@ -817,7 +831,7 @@ OUTPUT FORMAT:
|
|||||||
else explainStep('Uudelleentarkistus', `${tst.name} tarkistaa korjaukset.`);
|
else explainStep('Uudelleentarkistus', `${tst.name} tarkistaa korjaukset.`);
|
||||||
|
|
||||||
const reviewPrompt = (tst.prompt ? tst.prompt+'\n\n' : '') + `Review this project:\n\n${currentCode}`;
|
const reviewPrompt = (tst.prompt ? tst.prompt+'\n\n' : '') + `Review this project:\n\n${currentCode}`;
|
||||||
const review = await kpnRun(tst.model, reviewPrompt);
|
const review = await kpnRun(tst.model, reviewPrompt, false, tst);
|
||||||
stepN++;
|
stepN++;
|
||||||
|
|
||||||
// LGTM → ei korjauksia tarvita
|
// LGTM → ei korjauksia tarvita
|
||||||
@@ -832,7 +846,7 @@ OUTPUT FORMAT:
|
|||||||
explainStep('Korjaus', `${tst.name} löysi ongelmia. ${cdr.name} saa palautteen ja korjaa.`);
|
explainStep('Korjaus', `${tst.name} löysi ongelmia. ${cdr.name} saa palautteen ja korjaa.`);
|
||||||
|
|
||||||
const fixPrompt = `${cdr.prompt ? cdr.prompt+'\n\n' : ''}Fix these issues:\n${review}\n\nCurrent code:\n${currentCode}\n\nWrite ALL corrected files. Start each file with: --- filename.py ---`;
|
const fixPrompt = `${cdr.prompt ? cdr.prompt+'\n\n' : ''}Fix these issues:\n${review}\n\nCurrent code:\n${currentCode}\n\nWrite ALL corrected files. Start each file with: --- filename.py ---`;
|
||||||
const fixedCode = await kpnRun(cdr.model, fixPrompt);
|
const fixedCode = await kpnRun(cdr.model, fixPrompt, false, cdr);
|
||||||
|
|
||||||
// Parsitaan korjatut tiedostot takaisin files-objektiin
|
// Parsitaan korjatut tiedostot takaisin files-objektiin
|
||||||
if (fixedCode) {
|
if (fixedCode) {
|
||||||
@@ -858,7 +872,7 @@ OUTPUT FORMAT:
|
|||||||
highlightAgent('qa');
|
highlightAgent('qa');
|
||||||
explainStep('Testit', `${qaAgent.name} kirjoittaa pytest-testit korjatulle koodille.`);
|
explainStep('Testit', `${qaAgent.name} kirjoittaa pytest-testit korjatulle koodille.`);
|
||||||
const qaPrompt = (qaAgent.prompt ? qaAgent.prompt+'\n\n' : '') + `Write pytest tests for this project:\n\n${updatedCode}\n\nWrite a complete test_main.py file with TestClient.`;
|
const qaPrompt = (qaAgent.prompt ? qaAgent.prompt+'\n\n' : '') + `Write pytest tests for this project:\n\n${updatedCode}\n\nWrite a complete test_main.py file with TestClient.`;
|
||||||
const tests = await kpnRun(qaAgent.model, qaPrompt);
|
const tests = await kpnRun(qaAgent.model, qaPrompt, false, qaAgent);
|
||||||
if (tests) files['test_main.py'] = tests;
|
if (tests) files['test_main.py'] = tests;
|
||||||
stepN++;
|
stepN++;
|
||||||
}
|
}
|
||||||
@@ -878,7 +892,7 @@ OUTPUT FORMAT:
|
|||||||
`- Expose port 8000\n` +
|
`- Expose port 8000\n` +
|
||||||
`- CMD: uv run uvicorn main:app --host 0.0.0.0 --port 8000\n` +
|
`- CMD: uv run uvicorn main:app --host 0.0.0.0 --port 8000\n` +
|
||||||
`\nWrite ONLY the Dockerfile, no explanations.`;
|
`\nWrite ONLY the Dockerfile, no explanations.`;
|
||||||
const dockerfile = await kpnRun(tst.model, dockerPrompt);
|
const dockerfile = await kpnRun(tst.model, dockerPrompt, false, tst);
|
||||||
if (dockerfile) files['Dockerfile'] = dockerfile;
|
if (dockerfile) files['Dockerfile'] = dockerfile;
|
||||||
stepN++;
|
stepN++;
|
||||||
|
|
||||||
@@ -913,7 +927,7 @@ OUTPUT FORMAT:
|
|||||||
`## Architecture\nDescribe the project structure and design decisions.\n\n` +
|
`## Architecture\nDescribe the project structure and design decisions.\n\n` +
|
||||||
`## Risk Assessment\n| Severity | Issue |\n|----------|-------|\n| ... | ... |\n\n` +
|
`## Risk Assessment\n| Severity | Issue |\n|----------|-------|\n| ... | ... |\n\n` +
|
||||||
`Project code:\n${finalCode}`;
|
`Project code:\n${finalCode}`;
|
||||||
const readme = await kpnRun(obs.model, obsPrompt);
|
const readme = await kpnRun(obs.model, obsPrompt, false, obs);
|
||||||
if (readme) {
|
if (readme) {
|
||||||
files['README.md'] = readme;
|
files['README.md'] = readme;
|
||||||
// Tallennetaan raportti globaalisti jotta tarkkailija-klikkaus avaa sen
|
// Tallennetaan raportti globaalisti jotta tarkkailija-klikkaus avaa sen
|
||||||
|
|||||||
@@ -1141,6 +1141,16 @@ struct ChatCompletionRequest {
|
|||||||
task_id: String,
|
task_id: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
max_tokens: Option<u64>,
|
max_tokens: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
system_prompt: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
temperature: Option<f64>,
|
||||||
|
#[serde(default)]
|
||||||
|
top_k: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
repeat_penalty: Option<f64>,
|
||||||
|
#[serde(default)]
|
||||||
|
stop: Option<Vec<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
@@ -1308,9 +1318,13 @@ async fn api_chat_completions(
|
|||||||
"model": payload.model,
|
"model": payload.model,
|
||||||
"task_id": payload.task_id,
|
"task_id": payload.task_id,
|
||||||
});
|
});
|
||||||
if let Some(mt) = payload.max_tokens {
|
let obj = msg.as_object_mut().unwrap();
|
||||||
msg.as_object_mut().unwrap().insert("max_tokens".to_string(), serde_json::json!(mt));
|
if let Some(mt) = payload.max_tokens { obj.insert("max_tokens".to_string(), serde_json::json!(mt)); }
|
||||||
}
|
if let Some(ref sp) = payload.system_prompt { obj.insert("system_prompt".to_string(), serde_json::json!(sp)); }
|
||||||
|
if let Some(t) = payload.temperature { obj.insert("temperature".to_string(), serde_json::json!(t)); }
|
||||||
|
if let Some(k) = payload.top_k { obj.insert("top_k".to_string(), serde_json::json!(k)); }
|
||||||
|
if let Some(rp) = payload.repeat_penalty { obj.insert("repeat_penalty".to_string(), serde_json::json!(rp)); }
|
||||||
|
if let Some(ref s) = payload.stop { obj.insert("stop".to_string(), serde_json::json!(s)); }
|
||||||
|
|
||||||
// Oneshot-kanava: solmu palauttaa tuloksen suoraan tälle pyynnölle
|
// Oneshot-kanava: solmu palauttaa tuloksen suoraan tälle pyynnölle
|
||||||
let (resp_tx, resp_rx) = tokio::sync::oneshot::channel::<serde_json::Value>();
|
let (resp_tx, resp_rx) = tokio::sync::oneshot::channel::<serde_json::Value>();
|
||||||
|
|||||||
@@ -1,6 +1,15 @@
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
|
|
||||||
|
pub struct GenerateOptions {
|
||||||
|
pub max_tokens: usize,
|
||||||
|
pub system_prompt: Option<String>,
|
||||||
|
pub temperature: Option<f64>,
|
||||||
|
pub top_k: Option<u64>,
|
||||||
|
pub repeat_penalty: Option<f64>,
|
||||||
|
pub stop: Option<Vec<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct LlmEngine {
|
pub struct LlmEngine {
|
||||||
ollama_url: String,
|
ollama_url: String,
|
||||||
model: RefCell<String>,
|
model: RefCell<String>,
|
||||||
@@ -96,25 +105,34 @@ impl LlmEngine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<GenerateResult, String> {
|
pub async fn generate(&self, prompt: &str, opts: &GenerateOptions) -> Result<GenerateResult, String> {
|
||||||
// System prompt tulee agentin konfiguraatiosta (frontend lähettää sen osana promptia).
|
|
||||||
// Tässä ei yliajeta sitä — Ollama saa vain prompt-kentän.
|
|
||||||
let model = self.model.borrow().clone();
|
let model = self.model.borrow().clone();
|
||||||
|
|
||||||
|
let default_stop: Vec<String> = vec![
|
||||||
|
"<|im_end|>".into(), "\n###".into(), "\nExplanation".into(),
|
||||||
|
"\nNote:".into(), "\nPlease note".into(), "\nThis is".into(),
|
||||||
|
"\n```\n\n".into(), "\n// Example".into(), "\n# Example".into(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut body = serde_json::json!({
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": false,
|
||||||
|
"options": {
|
||||||
|
"num_predict": opts.max_tokens,
|
||||||
|
"temperature": opts.temperature.unwrap_or(0.7),
|
||||||
|
"top_k": opts.top_k.unwrap_or(40),
|
||||||
|
"repeat_penalty": opts.repeat_penalty.unwrap_or(1.15),
|
||||||
|
"stop": opts.stop.as_ref().unwrap_or(&default_stop),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if let Some(ref sp) = opts.system_prompt {
|
||||||
|
body.as_object_mut().unwrap().insert("system".to_string(), serde_json::json!(sp));
|
||||||
|
}
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
|
let resp = self.client.post(format!("{}/api/generate", self.ollama_url))
|
||||||
.json(&serde_json::json!({
|
.json(&body)
|
||||||
"model": model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": false,
|
|
||||||
"options": {
|
|
||||||
"num_predict": max_tokens,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_k": 40,
|
|
||||||
"repeat_penalty": 1.15,
|
|
||||||
"stop": ["<|im_end|>", "\n###", "\nExplanation", "\nNote:", "\nPlease note", "\nThis is", "\n```\n\n", "\n// Example", "\n# Example"]
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| format!("Ollama generate: {}", e))?;
|
.map_err(|e| format!("Ollama generate: {}", e))?;
|
||||||
|
|||||||
@@ -472,7 +472,14 @@ async fn main() {
|
|||||||
|
|
||||||
if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder") || msg_model.starts_with("phi")) {
|
if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder") || msg_model.starts_with("phi")) {
|
||||||
if let Some(ref engine) = llm {
|
if let Some(ref engine) = llm {
|
||||||
let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize;
|
let gen_opts = inference::GenerateOptions {
|
||||||
|
max_tokens: task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize,
|
||||||
|
system_prompt: task.get("system_prompt").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||||
|
temperature: task.get("temperature").and_then(|v| v.as_f64()),
|
||||||
|
top_k: task.get("top_k").and_then(|v| v.as_u64()),
|
||||||
|
repeat_penalty: task.get("repeat_penalty").and_then(|v| v.as_f64()),
|
||||||
|
stop: task.get("stop").and_then(|v| v.as_array()).map(|a| a.iter().filter_map(|s| s.as_str().map(|s| s.to_string())).collect()),
|
||||||
|
};
|
||||||
let prompt_lines = prompt.lines().count();
|
let prompt_lines = prompt.lines().count();
|
||||||
let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect();
|
let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect();
|
||||||
tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last);
|
tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last);
|
||||||
@@ -480,11 +487,10 @@ async fn main() {
|
|||||||
let mut st = tui_state.write().await;
|
let mut st = tui_state.write().await;
|
||||||
st.cur_task_id = Some(task_id.to_string());
|
st.cur_task_id = Some(task_id.to_string());
|
||||||
st.cur_prompt = Some(format!("→ {} riviä | \"{}...\"", prompt_lines, prompt_last));
|
st.cur_prompt = Some(format!("→ {} riviä | \"{}...\"", prompt_lines, prompt_last));
|
||||||
// Ei login puskemista vielä tässä! Yhdistetään se valmiin lohkoon yhdelle riville.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let model_name = engine.model_name();
|
let model_name = engine.model_name();
|
||||||
match engine.generate(prompt, max_tokens).await {
|
match engine.generate(prompt, &gen_opts).await {
|
||||||
Ok(result) => {
|
Ok(result) => {
|
||||||
let tokens_sec = (result.tokens_per_sec * 10.0).round() / 10.0;
|
let tokens_sec = (result.tokens_per_sec * 10.0).round() / 10.0;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
|
|||||||
@@ -368,11 +368,17 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso
|
|||||||
let _ = ws_clone.borrow().send_with_str(&err_msg.to_string());
|
let _ = ws_clone.borrow().send_with_str(&err_msg.to_string());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Välitetään parametrit JSON-promptina coderille
|
||||||
|
let coder_prompt = serde_json::json!({
|
||||||
|
"prompt": prompt,
|
||||||
|
"system": task.get("system_prompt").and_then(|v| v.as_str()).unwrap_or(""),
|
||||||
|
"max_tokens": task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(512),
|
||||||
|
}).to_string();
|
||||||
let use_3b = current_task == 5;
|
let use_3b = current_task == 5;
|
||||||
LLM_BUSY.store(true, Ordering::SeqCst);
|
LLM_BUSY.store(true, Ordering::SeqCst);
|
||||||
let ws_for_async = ws_clone.clone();
|
let ws_for_async = ws_clone.clone();
|
||||||
wasm_bindgen_futures::spawn_local(async move {
|
wasm_bindgen_futures::spawn_local(async move {
|
||||||
qwen_coder::run_coder_inference(prompt, ws_for_async, use_3b, task_id).await;
|
qwen_coder::run_coder_inference(coder_prompt, ws_for_async, use_3b, task_id).await;
|
||||||
LLM_BUSY.store(false, Ordering::SeqCst);
|
LLM_BUSY.store(false, Ordering::SeqCst);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user