Benchmark: näytä kontekstin koko (promptin token-arvio) tuloksissa

This commit is contained in:
2026-04-14 08:05:59 +03:00
parent 8efbf96295
commit 4a811e4171

View File

@@ -272,6 +272,7 @@ async function runPipeline(model, scenario) {
validationIssues: 0, fixRounds: 0, validationIssues: 0, fixRounds: 0,
testsTotal: 0, testsPassed: 0, testsFailed: 0, testsTotal: 0, testsPassed: 0, testsFailed: 0,
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0, totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
promptChars: 0, promptTokensEst: 0,
error: null, error: null,
}; };
const timings = []; const timings = [];
@@ -300,6 +301,8 @@ async function runPipeline(model, scenario) {
// 3. LLM-koodigenerointi // 3. LLM-koodigenerointi
console.log(` [3/5] Koodigenerointi (LLM)...`); console.log(` [3/5] Koodigenerointi (LLM)...`);
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`; const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
result.promptTokensEst = Math.round(result.promptChars / 4);
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192); const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
timings.push(codeResp); timings.push(codeResp);
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text); writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
@@ -420,7 +423,8 @@ async function main() {
const status = r.error ? `${r.error}` : const status = r.error ? `${r.error}` :
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` :
`${r.testsPassed}/${r.testsTotal}`; `${r.testsPassed}/${r.testsTotal}`;
console.log(`${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s`); const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
console.log(`${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
} }
} }
@@ -435,6 +439,7 @@ async function main() {
'Speksi'.padEnd(8), 'Speksi'.padEnd(8),
'Testit'.padEnd(10), 'Testit'.padEnd(10),
'Korjaus'.padEnd(8), 'Korjaus'.padEnd(8),
'Ctx'.padEnd(7),
'Aika'.padEnd(8), 'Aika'.padEnd(8),
'tok/s'.padEnd(8), 'tok/s'.padEnd(8),
'Tulos', 'Tulos',
@@ -446,6 +451,7 @@ async function main() {
const specStatus = r.specOk ? `${r.specEntities}e` : '✗'; const specStatus = r.specOk ? `${r.specEntities}e` : '✗';
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-'; const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-'; const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`; const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
const speed = `${r.avgTokPerSec.toFixed(0)}`; const speed = `${r.avgTokPerSec.toFixed(0)}`;
const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL'; const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
@@ -456,6 +462,7 @@ async function main() {
specStatus.padEnd(8), specStatus.padEnd(8),
testStatus.padEnd(10), testStatus.padEnd(10),
fixStatus.padEnd(8), fixStatus.padEnd(8),
ctx.padEnd(7),
time.padEnd(8), time.padEnd(8),
speed.padEnd(8), speed.padEnd(8),
verdict, verdict,