Benchmark: näytä kontekstin koko (promptin token-arvio) tuloksissa

2026-04-14 08:05:59 +03:00
parent 8efbf96295
commit 4a811e4171
1 changed files with 8 additions and 1 deletions
--- a/network-poc/tests/model-benchmark.mjs
+++ b/network-poc/tests/model-benchmark.mjs
@@ -272,6 +272,7 @@ async function runPipeline(model, scenario) {
        validationIssues: 0, fixRounds: 0,
        testsTotal: 0, testsPassed: 0, testsFailed: 0,
        totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
        promptChars: 0, promptTokensEst: 0,
        error: null,
    };
    const timings = [];
@@ -300,6 +301,8 @@ async function runPipeline(model, scenario) {
        // 3. LLM-koodigenerointi
        console.log(`    [3/5] Koodigenerointi (LLM)...`);
        const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
        result.promptChars = CODE_SYSTEM.length + codePrompt.length;
        result.promptTokensEst = Math.round(result.promptChars / 4);
        const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
        timings.push(codeResp);
        writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
@@ -420,7 +423,8 @@ async function main() {
            const status = r.error ? `✗ ${r.error}` :
                r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
                `◐ ${r.testsPassed}/${r.testsTotal}`;
-            console.log(`    → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s`);
+            const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
            console.log(`    → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
        }
    }
@@ -435,6 +439,7 @@ async function main() {
        'Speksi'.padEnd(8),
        'Testit'.padEnd(10),
        'Korjaus'.padEnd(8),
        'Ctx'.padEnd(7),
        'Aika'.padEnd(8),
        'tok/s'.padEnd(8),
        'Tulos',
@@ -446,6 +451,7 @@ async function main() {
        const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
        const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
        const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
        const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
        const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
        const speed = `${r.avgTokPerSec.toFixed(0)}`;
        const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
@@ -456,6 +462,7 @@ async function main() {
            specStatus.padEnd(8),
            testStatus.padEnd(10),
            fixStatus.padEnd(8),
            ctx.padEnd(7),
            time.padEnd(8),
            speed.padEnd(8),
            verdict,