From 5f005820535910a5052a33cfcfc0bd6909d11c25 Mon Sep 17 00:00:00 2001 From: Jaakko Vanhala Date: Sun, 12 Apr 2026 07:39:41 +0300 Subject: [PATCH] =?UTF-8?q?UI:n=20system=20prompt=20ja=20sampling-parametr?= =?UTF-8?q?it=20v=C3=A4littyv=C3=A4t=20inferenssiin=20asti?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frontend lähettää agentin asetukset (system_prompt, temperature, top_k, max_tokens, repeat_penalty, stop) API:lle. Hub välittää ne solmulle. Native-node ja Wasm-coder käyttävät välitettyjä arvoja hardkoodattujen sijaan. --- network-poc/frontend/src/pages/index.astro | 30 ++++++++++---- network-poc/hub/src/main.rs | 20 +++++++-- network-poc/native-node/src/inference.rs | 48 +++++++++++++++------- network-poc/native-node/src/main.rs | 12 ++++-- network-poc/node/src/lib.rs | 8 +++- 5 files changed, 88 insertions(+), 30 deletions(-) diff --git a/network-poc/frontend/src/pages/index.astro b/network-poc/frontend/src/pages/index.astro index d59b406..49476ea 100644 --- a/network-poc/frontend/src/pages/index.astro +++ b/network-poc/frontend/src/pages/index.astro @@ -486,7 +486,7 @@ OUTPUT FORMAT: // === kpnRun: lähettää promptin mallille === const activeStreams = {}; - async function kpnRun(model, prompt, silent) { + async function kpnRun(model, prompt, silent, agentOpts) { const taskId = crypto.randomUUID(); const statusDiv = document.createElement('div'); statusDiv.className = 'terminal-line'; @@ -511,10 +511,24 @@ OUTPUT FORMAT: statusDiv.innerHTML = ` ${model} käsittelee...`; + // Rakennetaan pyyntö: agentin asetukset tai globaalit oletukset + const opts = agentOpts || {}; + const payload = { + model, + prompt, + task_id: taskId, + system_prompt: opts.systemPrompt || settings.systemPrompt || undefined, + temperature: opts.temperature ?? settings.temperature ?? undefined, + top_k: opts.topK ?? settings.topK ?? undefined, + max_tokens: opts.maxTokens ?? settings.maxTokens ?? undefined, + repeat_penalty: opts.repeatPenalty ?? settings.repeatPenalty ?? undefined, + stop: settings.stopSequences ? settings.stopSequences.split('\\n').filter(Boolean) : undefined, + }; + const res = await fetch('/api/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model, prompt, task_id: taskId }), + body: JSON.stringify(payload), }); if (res.status === 503 && !wasmNodeStarted) { @@ -792,7 +806,7 @@ OUTPUT FORMAT: prompt += fileDef.instructions + '\n'; prompt += 'Adapt the example to match the project description. Import from already written files. Write ONLY the code, no explanations.'; - const code = await kpnRun(fileAgent.model, prompt); + const code = await kpnRun(fileAgent.model, prompt, false, fileAgent); if (!code) { termLog(` ✗ Keskeytyi (${fileName})`, '#f85149'); return; @@ -817,7 +831,7 @@ OUTPUT FORMAT: else explainStep('Uudelleentarkistus', `${tst.name} tarkistaa korjaukset.`); const reviewPrompt = (tst.prompt ? tst.prompt+'\n\n' : '') + `Review this project:\n\n${currentCode}`; - const review = await kpnRun(tst.model, reviewPrompt); + const review = await kpnRun(tst.model, reviewPrompt, false, tst); stepN++; // LGTM → ei korjauksia tarvita @@ -832,7 +846,7 @@ OUTPUT FORMAT: explainStep('Korjaus', `${tst.name} löysi ongelmia. ${cdr.name} saa palautteen ja korjaa.`); const fixPrompt = `${cdr.prompt ? cdr.prompt+'\n\n' : ''}Fix these issues:\n${review}\n\nCurrent code:\n${currentCode}\n\nWrite ALL corrected files. Start each file with: --- filename.py ---`; - const fixedCode = await kpnRun(cdr.model, fixPrompt); + const fixedCode = await kpnRun(cdr.model, fixPrompt, false, cdr); // Parsitaan korjatut tiedostot takaisin files-objektiin if (fixedCode) { @@ -858,7 +872,7 @@ OUTPUT FORMAT: highlightAgent('qa'); explainStep('Testit', `${qaAgent.name} kirjoittaa pytest-testit korjatulle koodille.`); const qaPrompt = (qaAgent.prompt ? qaAgent.prompt+'\n\n' : '') + `Write pytest tests for this project:\n\n${updatedCode}\n\nWrite a complete test_main.py file with TestClient.`; - const tests = await kpnRun(qaAgent.model, qaPrompt); + const tests = await kpnRun(qaAgent.model, qaPrompt, false, qaAgent); if (tests) files['test_main.py'] = tests; stepN++; } @@ -878,7 +892,7 @@ OUTPUT FORMAT: `- Expose port 8000\n` + `- CMD: uv run uvicorn main:app --host 0.0.0.0 --port 8000\n` + `\nWrite ONLY the Dockerfile, no explanations.`; - const dockerfile = await kpnRun(tst.model, dockerPrompt); + const dockerfile = await kpnRun(tst.model, dockerPrompt, false, tst); if (dockerfile) files['Dockerfile'] = dockerfile; stepN++; @@ -913,7 +927,7 @@ OUTPUT FORMAT: `## Architecture\nDescribe the project structure and design decisions.\n\n` + `## Risk Assessment\n| Severity | Issue |\n|----------|-------|\n| ... | ... |\n\n` + `Project code:\n${finalCode}`; - const readme = await kpnRun(obs.model, obsPrompt); + const readme = await kpnRun(obs.model, obsPrompt, false, obs); if (readme) { files['README.md'] = readme; // Tallennetaan raportti globaalisti jotta tarkkailija-klikkaus avaa sen diff --git a/network-poc/hub/src/main.rs b/network-poc/hub/src/main.rs index ac89d25..78c2217 100644 --- a/network-poc/hub/src/main.rs +++ b/network-poc/hub/src/main.rs @@ -1141,6 +1141,16 @@ struct ChatCompletionRequest { task_id: String, #[serde(default)] max_tokens: Option, + #[serde(default)] + system_prompt: Option, + #[serde(default)] + temperature: Option, + #[serde(default)] + top_k: Option, + #[serde(default)] + repeat_penalty: Option, + #[serde(default)] + stop: Option>, } #[derive(serde::Serialize)] @@ -1308,9 +1318,13 @@ async fn api_chat_completions( "model": payload.model, "task_id": payload.task_id, }); - if let Some(mt) = payload.max_tokens { - msg.as_object_mut().unwrap().insert("max_tokens".to_string(), serde_json::json!(mt)); - } + let obj = msg.as_object_mut().unwrap(); + if let Some(mt) = payload.max_tokens { obj.insert("max_tokens".to_string(), serde_json::json!(mt)); } + if let Some(ref sp) = payload.system_prompt { obj.insert("system_prompt".to_string(), serde_json::json!(sp)); } + if let Some(t) = payload.temperature { obj.insert("temperature".to_string(), serde_json::json!(t)); } + if let Some(k) = payload.top_k { obj.insert("top_k".to_string(), serde_json::json!(k)); } + if let Some(rp) = payload.repeat_penalty { obj.insert("repeat_penalty".to_string(), serde_json::json!(rp)); } + if let Some(ref s) = payload.stop { obj.insert("stop".to_string(), serde_json::json!(s)); } // Oneshot-kanava: solmu palauttaa tuloksen suoraan tälle pyynnölle let (resp_tx, resp_rx) = tokio::sync::oneshot::channel::(); diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index ab8f17b..8f0657e 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -1,6 +1,15 @@ use std::time::Instant; use std::cell::RefCell; +pub struct GenerateOptions { + pub max_tokens: usize, + pub system_prompt: Option, + pub temperature: Option, + pub top_k: Option, + pub repeat_penalty: Option, + pub stop: Option>, +} + pub struct LlmEngine { ollama_url: String, model: RefCell, @@ -96,25 +105,34 @@ impl LlmEngine { } } - pub async fn generate(&self, prompt: &str, max_tokens: usize) -> Result { - // System prompt tulee agentin konfiguraatiosta (frontend lähettää sen osana promptia). - // Tässä ei yliajeta sitä — Ollama saa vain prompt-kentän. + pub async fn generate(&self, prompt: &str, opts: &GenerateOptions) -> Result { let model = self.model.borrow().clone(); + let default_stop: Vec = vec![ + "<|im_end|>".into(), "\n###".into(), "\nExplanation".into(), + "\nNote:".into(), "\nPlease note".into(), "\nThis is".into(), + "\n```\n\n".into(), "\n// Example".into(), "\n# Example".into(), + ]; + + let mut body = serde_json::json!({ + "model": model, + "prompt": prompt, + "stream": false, + "options": { + "num_predict": opts.max_tokens, + "temperature": opts.temperature.unwrap_or(0.7), + "top_k": opts.top_k.unwrap_or(40), + "repeat_penalty": opts.repeat_penalty.unwrap_or(1.15), + "stop": opts.stop.as_ref().unwrap_or(&default_stop), + } + }); + if let Some(ref sp) = opts.system_prompt { + body.as_object_mut().unwrap().insert("system".to_string(), serde_json::json!(sp)); + } + let start = Instant::now(); let resp = self.client.post(format!("{}/api/generate", self.ollama_url)) - .json(&serde_json::json!({ - "model": model, - "prompt": prompt, - "stream": false, - "options": { - "num_predict": max_tokens, - "temperature": 0.7, - "top_k": 40, - "repeat_penalty": 1.15, - "stop": ["<|im_end|>", "\n###", "\nExplanation", "\nNote:", "\nPlease note", "\nThis is", "\n```\n\n", "\n// Example", "\n# Example"] - } - })) + .json(&body) .send() .await .map_err(|e| format!("Ollama generate: {}", e))?; diff --git a/network-poc/native-node/src/main.rs b/network-poc/native-node/src/main.rs index 39ac673..4393ad2 100644 --- a/network-poc/native-node/src/main.rs +++ b/network-poc/native-node/src/main.rs @@ -472,7 +472,14 @@ async fn main() { if !prompt.is_empty() && (msg_model.starts_with("qwen-coder") || msg_model.starts_with("qwen2.5-coder") || msg_model.starts_with("phi")) { if let Some(ref engine) = llm { - let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize; + let gen_opts = inference::GenerateOptions { + max_tokens: task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(1024) as usize, + system_prompt: task.get("system_prompt").and_then(|v| v.as_str()).map(|s| s.to_string()), + temperature: task.get("temperature").and_then(|v| v.as_f64()), + top_k: task.get("top_k").and_then(|v| v.as_u64()), + repeat_penalty: task.get("repeat_penalty").and_then(|v| v.as_f64()), + stop: task.get("stop").and_then(|v| v.as_array()).map(|a| a.iter().filter_map(|s| s.as_str().map(|s| s.to_string())).collect()), + }; let prompt_lines = prompt.lines().count(); let prompt_last: String = prompt.lines().last().unwrap_or("").chars().take(60).collect(); tracing::info!("→ task_id:{} | {}r prompti | \"{}...\"", task_id, prompt_lines, prompt_last); @@ -480,11 +487,10 @@ async fn main() { let mut st = tui_state.write().await; st.cur_task_id = Some(task_id.to_string()); st.cur_prompt = Some(format!("→ {} riviä | \"{}...\"", prompt_lines, prompt_last)); - // Ei login puskemista vielä tässä! Yhdistetään se valmiin lohkoon yhdelle riville. } let model_name = engine.model_name(); - match engine.generate(prompt, max_tokens).await { + match engine.generate(prompt, &gen_opts).await { Ok(result) => { let tokens_sec = (result.tokens_per_sec * 10.0).round() / 10.0; tracing::info!( diff --git a/network-poc/node/src/lib.rs b/network-poc/node/src/lib.rs index 1873d97..12a6363 100644 --- a/network-poc/node/src/lib.rs +++ b/network-poc/node/src/lib.rs @@ -368,11 +368,17 @@ pub async fn start_agent_node(hub_url: String, has_webgpu: bool, device_info_jso let _ = ws_clone.borrow().send_with_str(&err_msg.to_string()); } } else { + // Välitetään parametrit JSON-promptina coderille + let coder_prompt = serde_json::json!({ + "prompt": prompt, + "system": task.get("system_prompt").and_then(|v| v.as_str()).unwrap_or(""), + "max_tokens": task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(512), + }).to_string(); let use_3b = current_task == 5; LLM_BUSY.store(true, Ordering::SeqCst); let ws_for_async = ws_clone.clone(); wasm_bindgen_futures::spawn_local(async move { - qwen_coder::run_coder_inference(prompt, ws_for_async, use_3b, task_id).await; + qwen_coder::run_coder_inference(coder_prompt, ws_for_async, use_3b, task_id).await; LLM_BUSY.store(false, Ordering::SeqCst); }); }