max_tokens per vaihe: manageri 200, koodari 512, testaaja 200, QA 512, DevOps 256
Hub ja natiivisolmu tukevat nyt max_tokens-kenttää API-pyynnöissä. Pipeline-vaiheet käyttävät sopivan kokoisia token-rajoja. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -947,6 +947,8 @@ struct ChatCompletionRequest {
|
|||||||
model: String,
|
model: String,
|
||||||
prompt: String,
|
prompt: String,
|
||||||
task_id: String,
|
task_id: String,
|
||||||
|
#[serde(default)]
|
||||||
|
max_tokens: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
@@ -1069,12 +1071,15 @@ async fn api_chat_completions(
|
|||||||
state.node_busy.lock().unwrap().insert(target_node_id);
|
state.node_busy.lock().unwrap().insert(target_node_id);
|
||||||
state.pending_task_ids.lock().unwrap().insert(payload.task_id.clone());
|
state.pending_task_ids.lock().unwrap().insert(payload.task_id.clone());
|
||||||
|
|
||||||
let msg = serde_json::json!({
|
let mut msg = serde_json::json!({
|
||||||
"type": "llm_prompt",
|
"type": "llm_prompt",
|
||||||
"prompt": payload.prompt,
|
"prompt": payload.prompt,
|
||||||
"model": payload.model,
|
"model": payload.model,
|
||||||
"task_id": payload.task_id,
|
"task_id": payload.task_id,
|
||||||
});
|
});
|
||||||
|
if let Some(mt) = payload.max_tokens {
|
||||||
|
msg.as_object_mut().unwrap().insert("max_tokens".to_string(), serde_json::json!(mt));
|
||||||
|
}
|
||||||
|
|
||||||
// Odotuskanava valmiiksi (solmu palauttaa tuloksen stats_tx kautta)
|
// Odotuskanava valmiiksi (solmu palauttaa tuloksen stats_tx kautta)
|
||||||
let mut rx = state.stats_tx.subscribe();
|
let mut rx = state.stats_tx.subscribe();
|
||||||
|
|||||||
@@ -326,9 +326,10 @@ async fn main() {
|
|||||||
|
|
||||||
if let Some(ref mut engine) = llm {
|
if let Some(ref mut engine) = llm {
|
||||||
busy = true;
|
busy = true;
|
||||||
tracing::info!("Generoidaan (task_id: {}): \"{}\"", task_id, prompt);
|
let max_tokens = task.get("max_tokens").and_then(|v| v.as_u64()).unwrap_or(512) as usize;
|
||||||
|
tracing::info!("Generoidaan (task_id: {}, max_tokens: {}): \"{}\"", task_id, max_tokens, prompt);
|
||||||
|
|
||||||
match engine.generate(prompt, 512) {
|
match engine.generate(prompt, max_tokens) {
|
||||||
Ok(result) => {
|
Ok(result) => {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Tulos: {} tokenia | {:.0}ms | {:.1} tok/s | \"{}\"",
|
"Tulos: {} tokenia | {:.0}ms | {:.1} tok/s | \"{}\"",
|
||||||
|
|||||||
@@ -1767,7 +1767,7 @@
|
|||||||
const activeStreams = {};
|
const activeStreams = {};
|
||||||
|
|
||||||
// Lähettää promptin mallille ja palauttaa vastauksen (tai null virhetilanteessa)
|
// Lähettää promptin mallille ja palauttaa vastauksen (tai null virhetilanteessa)
|
||||||
async function kpnRun(model, prompt, silent) {
|
async function kpnRun(model, prompt, silent, maxTokens) {
|
||||||
const taskId = crypto.randomUUID();
|
const taskId = crypto.randomUUID();
|
||||||
// Yksittäinen status-rivi jota päivitetään läpi pyynnön elinkaaren
|
// Yksittäinen status-rivi jota päivitetään läpi pyynnön elinkaaren
|
||||||
const statusDiv = document.createElement('div');
|
const statusDiv = document.createElement('div');
|
||||||
@@ -1801,7 +1801,7 @@
|
|||||||
const res = await fetch('/api/v1/chat/completions', {
|
const res = await fetch('/api/v1/chat/completions', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ model, prompt: fullPrompt, task_id: taskId }),
|
body: JSON.stringify({ model, prompt: fullPrompt, task_id: taskId, ...(maxTokens ? { max_tokens: maxTokens } : {}) }),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
@@ -2066,7 +2066,7 @@ CONSTRAINTS — the coder can only generate ~400 tokens per file:
|
|||||||
- Prefer fewer, focused files over many small ones
|
- Prefer fewer, focused files over many small ones
|
||||||
|
|
||||||
Project: ${task}`;
|
Project: ${task}`;
|
||||||
const plan = await kpnRun(agentPrompts.manager.model, managerPrompt);
|
const plan = await kpnRun(agentPrompts.manager.model, managerPrompt, false, 200);
|
||||||
if (!plan) { termLog(' ✗ Pipeline keskeytyi (manageri)', '#f85149'); return; }
|
if (!plan) { termLog(' ✗ Pipeline keskeytyi (manageri)', '#f85149'); return; }
|
||||||
pipelineStep('manager', 'Suunnittelu', 'done', task, plan);
|
pipelineStep('manager', 'Suunnittelu', 'done', task, plan);
|
||||||
|
|
||||||
@@ -2155,7 +2155,7 @@ IMPORTANT: Keep the code SHORT and focused. Max ~50 lines. No comments, no docst
|
|||||||
If the code is correct, say "LGTM".
|
If the code is correct, say "LGTM".
|
||||||
|
|
||||||
${allCode}`;
|
${allCode}`;
|
||||||
const review = await kpnRun(agentPrompts.tester.model, reviewPrompt);
|
const review = await kpnRun(agentPrompts.tester.model, reviewPrompt, false, 200);
|
||||||
pipelineStep('tester', 'Review', 'done', `${Object.keys(generatedFiles).length} tiedostoa`, review);
|
pipelineStep('tester', 'Review', 'done', `${Object.keys(generatedFiles).length} tiedostoa`, review);
|
||||||
|
|
||||||
// Vaihe 4: Korjausluuppi — jos testaaja löysi ongelmia
|
// Vaihe 4: Korjausluuppi — jos testaaja löysi ongelmia
|
||||||
@@ -2174,7 +2174,7 @@ Write the corrected code.`;
|
|||||||
if (fixedCode) {
|
if (fixedCode) {
|
||||||
termLog(`\n<span style="color:#58a6ff;font-weight:bold">[${fileList.length + 4}] Testaaja</span> — uudelleenarviointi`);
|
termLog(`\n<span style="color:#58a6ff;font-weight:bold">[${fileList.length + 4}] Testaaja</span> — uudelleenarviointi`);
|
||||||
pipelineStep('tester', 'Re-review', 'active', fixedCode);
|
pipelineStep('tester', 'Re-review', 'active', fixedCode);
|
||||||
const reReview = await kpnRun(agentPrompts.tester.model, `Review the corrected code briefly:\n${fixedCode}`);
|
const reReview = await kpnRun(agentPrompts.tester.model, `Review the corrected code briefly:\n${fixedCode}`, false, 128);
|
||||||
pipelineStep('tester', 'Re-review', 'done', fixedCode, reReview);
|
pipelineStep('tester', 'Re-review', 'done', fixedCode, reReview);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2186,7 +2186,7 @@ Write the corrected code.`;
|
|||||||
const qaPrompt = `Write a short test file (test_app.py) for this project. Use pytest. Max 3 test functions. Keep it minimal.
|
const qaPrompt = `Write a short test file (test_app.py) for this project. Use pytest. Max 3 test functions. Keep it minimal.
|
||||||
|
|
||||||
${Object.entries(generatedFiles).map(([n, c]) => `--- ${n} ---\n${c}`).join('\n\n')}`;
|
${Object.entries(generatedFiles).map(([n, c]) => `--- ${n} ---\n${c}`).join('\n\n')}`;
|
||||||
const tests = await kpnRun(agentPrompts.qa.model, qaPrompt);
|
const tests = await kpnRun(agentPrompts.qa.model, qaPrompt, false, 512);
|
||||||
if (tests) generatedFiles['test_app.py'] = tests;
|
if (tests) generatedFiles['test_app.py'] = tests;
|
||||||
pipelineStep('qa', 'Testit', 'done', 'test_app.py', tests);
|
pipelineStep('qa', 'Testit', 'done', 'test_app.py', tests);
|
||||||
|
|
||||||
@@ -2202,7 +2202,7 @@ ${Object.entries(generatedFiles).map(([n, c]) => `--- ${n} ---\n${c}`).join('\n\
|
|||||||
Max 15 lines. No badges, no license, no contributing section.
|
Max 15 lines. No badges, no license, no contributing section.
|
||||||
|
|
||||||
Files: ${Object.keys(generatedFiles).join(', ')}`;
|
Files: ${Object.keys(generatedFiles).join(', ')}`;
|
||||||
const readme = await kpnRun(agentPrompts.tester.model, devopsPrompt);
|
const readme = await kpnRun(agentPrompts.tester.model, devopsPrompt, false, 256);
|
||||||
if (readme) generatedFiles['README.md'] = readme;
|
if (readme) generatedFiles['README.md'] = readme;
|
||||||
pipelineStep('tester', 'DevOps', 'done', 'README.md', readme);
|
pipelineStep('tester', 'DevOps', 'done', 'README.md', readme);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user