Benchmark: näytä kontekstin koko (promptin token-arvio) tuloksissa
This commit is contained in:
@@ -272,6 +272,7 @@ async function runPipeline(model, scenario) {
|
|||||||
validationIssues: 0, fixRounds: 0,
|
validationIssues: 0, fixRounds: 0,
|
||||||
testsTotal: 0, testsPassed: 0, testsFailed: 0,
|
testsTotal: 0, testsPassed: 0, testsFailed: 0,
|
||||||
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
|
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
|
||||||
|
promptChars: 0, promptTokensEst: 0,
|
||||||
error: null,
|
error: null,
|
||||||
};
|
};
|
||||||
const timings = [];
|
const timings = [];
|
||||||
@@ -300,6 +301,8 @@ async function runPipeline(model, scenario) {
|
|||||||
// 3. LLM-koodigenerointi
|
// 3. LLM-koodigenerointi
|
||||||
console.log(` [3/5] Koodigenerointi (LLM)...`);
|
console.log(` [3/5] Koodigenerointi (LLM)...`);
|
||||||
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
|
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
|
||||||
|
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
|
||||||
|
result.promptTokensEst = Math.round(result.promptChars / 4);
|
||||||
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
|
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
|
||||||
timings.push(codeResp);
|
timings.push(codeResp);
|
||||||
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
|
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
|
||||||
@@ -420,7 +423,8 @@ async function main() {
|
|||||||
const status = r.error ? `✗ ${r.error}` :
|
const status = r.error ? `✗ ${r.error}` :
|
||||||
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
|
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
|
||||||
`◐ ${r.testsPassed}/${r.testsTotal}`;
|
`◐ ${r.testsPassed}/${r.testsTotal}`;
|
||||||
console.log(` → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s`);
|
const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
|
||||||
|
console.log(` → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -435,6 +439,7 @@ async function main() {
|
|||||||
'Speksi'.padEnd(8),
|
'Speksi'.padEnd(8),
|
||||||
'Testit'.padEnd(10),
|
'Testit'.padEnd(10),
|
||||||
'Korjaus'.padEnd(8),
|
'Korjaus'.padEnd(8),
|
||||||
|
'Ctx'.padEnd(7),
|
||||||
'Aika'.padEnd(8),
|
'Aika'.padEnd(8),
|
||||||
'tok/s'.padEnd(8),
|
'tok/s'.padEnd(8),
|
||||||
'Tulos',
|
'Tulos',
|
||||||
@@ -446,6 +451,7 @@ async function main() {
|
|||||||
const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
|
const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
|
||||||
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
||||||
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
|
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
|
||||||
|
const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
|
||||||
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
||||||
const speed = `${r.avgTokPerSec.toFixed(0)}`;
|
const speed = `${r.avgTokPerSec.toFixed(0)}`;
|
||||||
const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
|
const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
|
||||||
@@ -456,6 +462,7 @@ async function main() {
|
|||||||
specStatus.padEnd(8),
|
specStatus.padEnd(8),
|
||||||
testStatus.padEnd(10),
|
testStatus.padEnd(10),
|
||||||
fixStatus.padEnd(8),
|
fixStatus.padEnd(8),
|
||||||
|
ctx.padEnd(7),
|
||||||
time.padEnd(8),
|
time.padEnd(8),
|
||||||
speed.padEnd(8),
|
speed.padEnd(8),
|
||||||
verdict,
|
verdict,
|
||||||
|
|||||||
Reference in New Issue
Block a user