Benchmark: mallikohtainen yhteenvetotaulu + kokonaisaika
Näyttää per malli: testit ja aika per skenaario, kokonaisläpäisy, kokonaisaika, keskimääräinen tok/s ja keskipisteet.
This commit is contained in:
@@ -495,6 +495,48 @@ async function main() {
|
|||||||
}
|
}
|
||||||
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
|
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
|
||||||
|
|
||||||
|
// === Mallikohtainen yhteenveto ===
|
||||||
|
const modelNames = [...new Set(results.map(r => r.model))];
|
||||||
|
const scenarioIds = scenarios.map(s => s.id);
|
||||||
|
|
||||||
|
console.log('\n');
|
||||||
|
const mHeader = [
|
||||||
|
'Malli'.padEnd(35),
|
||||||
|
...scenarioIds.map(s => s.padEnd(18)),
|
||||||
|
'Yht.'.padEnd(8),
|
||||||
|
'Aika'.padEnd(8),
|
||||||
|
'tok/s'.padEnd(7),
|
||||||
|
'Pisteet',
|
||||||
|
].join(' │ ');
|
||||||
|
console.log(mHeader);
|
||||||
|
console.log('─'.repeat(mHeader.length));
|
||||||
|
|
||||||
|
for (const model of modelNames) {
|
||||||
|
const mrs = results.filter(r => r.model === model);
|
||||||
|
const cols = scenarioIds.map(sid => {
|
||||||
|
const r = mrs.find(r => r.scenario === sid);
|
||||||
|
if (!r) return '-'.padEnd(18);
|
||||||
|
const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
||||||
|
const s = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
||||||
|
return `${t} ${s}`.padEnd(18);
|
||||||
|
});
|
||||||
|
const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0);
|
||||||
|
const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0);
|
||||||
|
const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0;
|
||||||
|
const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0;
|
||||||
|
const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0;
|
||||||
|
const row = [
|
||||||
|
model.padEnd(35),
|
||||||
|
...cols,
|
||||||
|
`${totalPassed}/${totalTests}`.padEnd(8),
|
||||||
|
`${(totalTime/1000).toFixed(0)}s`.padEnd(8),
|
||||||
|
`${avgSpeed}`.padEnd(7),
|
||||||
|
`${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`,
|
||||||
|
].join(' │ ');
|
||||||
|
console.log(row);
|
||||||
|
}
|
||||||
|
|
||||||
// Tallenna JSON
|
// Tallenna JSON
|
||||||
writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
|
writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
|
||||||
console.log(`\nJSON: ${OUTPUT_DIR}/results.json`);
|
console.log(`\nJSON: ${OUTPUT_DIR}/results.json`);
|
||||||
@@ -504,7 +546,8 @@ async function main() {
|
|||||||
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
|
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
|
||||||
const failed = results.filter(r => r.error || r.testsTotal === 0);
|
const failed = results.filter(r => r.error || r.testsTotal === 0);
|
||||||
const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
|
const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
|
||||||
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length}`);
|
const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0);
|
||||||
|
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`);
|
||||||
}
|
}
|
||||||
|
|
||||||
main().catch(e => { console.error(e); process.exit(1); });
|
main().catch(e => { console.error(e); process.exit(1); });
|
||||||
|
|||||||
Reference in New Issue
Block a user