Benchmark: näytä kontekstin koko (promptin token-arvio) tuloksissa
This commit is contained in:
@@ -272,6 +272,7 @@ async function runPipeline(model, scenario) {
|
||||
validationIssues: 0, fixRounds: 0,
|
||||
testsTotal: 0, testsPassed: 0, testsFailed: 0,
|
||||
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
|
||||
promptChars: 0, promptTokensEst: 0,
|
||||
error: null,
|
||||
};
|
||||
const timings = [];
|
||||
@@ -300,6 +301,8 @@ async function runPipeline(model, scenario) {
|
||||
// 3. LLM-koodigenerointi
|
||||
console.log(` [3/5] Koodigenerointi (LLM)...`);
|
||||
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
|
||||
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
|
||||
result.promptTokensEst = Math.round(result.promptChars / 4);
|
||||
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
|
||||
timings.push(codeResp);
|
||||
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
|
||||
@@ -420,7 +423,8 @@ async function main() {
|
||||
const status = r.error ? `✗ ${r.error}` :
|
||||
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
|
||||
`◐ ${r.testsPassed}/${r.testsTotal}`;
|
||||
console.log(` → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s`);
|
||||
const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
|
||||
console.log(` → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -435,6 +439,7 @@ async function main() {
|
||||
'Speksi'.padEnd(8),
|
||||
'Testit'.padEnd(10),
|
||||
'Korjaus'.padEnd(8),
|
||||
'Ctx'.padEnd(7),
|
||||
'Aika'.padEnd(8),
|
||||
'tok/s'.padEnd(8),
|
||||
'Tulos',
|
||||
@@ -446,6 +451,7 @@ async function main() {
|
||||
const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
|
||||
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
||||
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
|
||||
const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
|
||||
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
||||
const speed = `${r.avgTokPerSec.toFixed(0)}`;
|
||||
const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
|
||||
@@ -456,6 +462,7 @@ async function main() {
|
||||
specStatus.padEnd(8),
|
||||
testStatus.padEnd(10),
|
||||
fixStatus.padEnd(8),
|
||||
ctx.padEnd(7),
|
||||
time.padEnd(8),
|
||||
speed.padEnd(8),
|
||||
verdict,
|
||||
|
||||
Reference in New Issue
Block a user