From 1ef7f7c95644ef86db273c3161423b0749a03ec0 Mon Sep 17 00:00:00 2001 From: jaakko Date: Mon, 6 Apr 2026 22:03:26 +0300 Subject: [PATCH] max_tokens per vaihe: manageri 200, koodari 512, testaaja 200, QA 512, DevOps 256 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hub ja natiivisolmu tukevat nyt max_tokens-kenttää API-pyynnöissä. Pipeline-vaiheet käyttävät sopivan kokoisia token-rajoja. Co-Authored-By: Claude Opus 4.6 (1M context) --- network-poc/hub/src/main.rs | 7 ++++++- network-poc/native-node/src/main.rs | 5 +++-- network-poc/static/index.html | 14 +++++++------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/network-poc/hub/src/main.rs b/network-poc/hub/src/main.rs index 22896f6..03fea2e 100644 --- a/network-poc/hub/src/main.rs +++ b/network-poc/hub/src/main.rs @@ -947,6 +947,8 @@ struct ChatCompletionRequest { model: String, prompt: String, task_id: String, + #[serde(default)] + max_tokens: Option, } #[derive(serde::Serialize)] @@ -1069,12 +1071,15 @@ async fn api_chat_completions( state.node_busy.lock().unwrap().insert(target_node_id); state.pending_task_ids.lock().unwrap().insert(payload.task_id.clone()); - let msg = serde_json::json!({ + let mut msg = serde_json::json!({ "type": "llm_prompt", "prompt": payload.prompt, "model": payload.model, "task_id": payload.task_id, }); + if let Some(mt) = payload.max_tokens { + msg.as_object_mut().unwrap().insert("max_tokens".to_string(), serde_json::json!(mt)); + } // Odotuskanava valmiiksi (solmu palauttaa tuloksen stats_tx kautta) let mut rx = state.stats_tx.subscribe(); diff --git a/network-poc/native-node/src/main.rs b/network-poc/native-node/src/main.rs index e1f7bc3..dffbc1f 100644 --- a/network-poc/native-node/src/main.rs +++ b/network-poc/native-node/src/main.rs @@ -326,9 +326,10 @@ async fn main() { if let Some(ref mut engine) = llm { busy = true; - tracing::info!("Generoidaan (task_id: {}): \"{}\"", task_id, prompt); + let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(512) as usize; + tracing::info!("Generoidaan (task_id: {}, max_tokens: {}): \"{}\"", task_id, max_tokens, prompt); - match engine.generate(prompt, 512) { + match engine.generate(prompt, max_tokens) { Ok(result) => { tracing::info!( "Tulos: {} tokenia | {:.0}ms | {:.1} tok/s | \"{}\"", diff --git a/network-poc/static/index.html b/network-poc/static/index.html index 3ddf99c..2fa6b5d 100644 --- a/network-poc/static/index.html +++ b/network-poc/static/index.html @@ -1767,7 +1767,7 @@ const activeStreams = {}; // Lähettää promptin mallille ja palauttaa vastauksen (tai null virhetilanteessa) - async function kpnRun(model, prompt, silent) { + async function kpnRun(model, prompt, silent, maxTokens) { const taskId = crypto.randomUUID(); // Yksittäinen status-rivi jota päivitetään läpi pyynnön elinkaaren const statusDiv = document.createElement('div'); @@ -1801,7 +1801,7 @@ const res = await fetch('/api/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model, prompt: fullPrompt, task_id: taskId }), + body: JSON.stringify({ model, prompt: fullPrompt, task_id: taskId, ...(maxTokens ? { max_tokens: maxTokens } : {}) }), }); if (!res.ok) { @@ -2066,7 +2066,7 @@ CONSTRAINTS — the coder can only generate ~400 tokens per file: - Prefer fewer, focused files over many small ones Project: ${task}`; - const plan = await kpnRun(agentPrompts.manager.model, managerPrompt); + const plan = await kpnRun(agentPrompts.manager.model, managerPrompt, false, 200); if (!plan) { termLog(' ✗ Pipeline keskeytyi (manageri)', '#f85149'); return; } pipelineStep('manager', 'Suunnittelu', 'done', task, plan); @@ -2155,7 +2155,7 @@ IMPORTANT: Keep the code SHORT and focused. Max ~50 lines. No comments, no docst If the code is correct, say "LGTM". ${allCode}`; - const review = await kpnRun(agentPrompts.tester.model, reviewPrompt); + const review = await kpnRun(agentPrompts.tester.model, reviewPrompt, false, 200); pipelineStep('tester', 'Review', 'done', `${Object.keys(generatedFiles).length} tiedostoa`, review); // Vaihe 4: Korjausluuppi — jos testaaja löysi ongelmia @@ -2174,7 +2174,7 @@ Write the corrected code.`; if (fixedCode) { termLog(`\n[${fileList.length + 4}] Testaaja — uudelleenarviointi`); pipelineStep('tester', 'Re-review', 'active', fixedCode); - const reReview = await kpnRun(agentPrompts.tester.model, `Review the corrected code briefly:\n${fixedCode}`); + const reReview = await kpnRun(agentPrompts.tester.model, `Review the corrected code briefly:\n${fixedCode}`, false, 128); pipelineStep('tester', 'Re-review', 'done', fixedCode, reReview); } } @@ -2186,7 +2186,7 @@ Write the corrected code.`; const qaPrompt = `Write a short test file (test_app.py) for this project. Use pytest. Max 3 test functions. Keep it minimal. ${Object.entries(generatedFiles).map(([n, c]) => `--- ${n} ---\n${c}`).join('\n\n')}`; - const tests = await kpnRun(agentPrompts.qa.model, qaPrompt); + const tests = await kpnRun(agentPrompts.qa.model, qaPrompt, false, 512); if (tests) generatedFiles['test_app.py'] = tests; pipelineStep('qa', 'Testit', 'done', 'test_app.py', tests); @@ -2202,7 +2202,7 @@ ${Object.entries(generatedFiles).map(([n, c]) => `--- ${n} ---\n${c}`).join('\n\ Max 15 lines. No badges, no license, no contributing section. Files: ${Object.keys(generatedFiles).join(', ')}`; - const readme = await kpnRun(agentPrompts.tester.model, devopsPrompt); + const readme = await kpnRun(agentPrompts.tester.model, devopsPrompt, false, 256); if (readme) generatedFiles['README.md'] = readme; pipelineStep('tester', 'DevOps', 'done', 'README.md', readme);