Benchmark: mallikohtainen yhteenvetotaulu + kokonaisaika
Näyttää per malli: testit ja aika per skenaario, kokonaisläpäisy, kokonaisaika, keskimääräinen tok/s ja keskipisteet.
This commit is contained in:
@@ -495,6 +495,48 @@ async function main() {
|
||||
}
|
||||
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
|
||||
|
||||
// === Mallikohtainen yhteenveto ===
|
||||
const modelNames = [...new Set(results.map(r => r.model))];
|
||||
const scenarioIds = scenarios.map(s => s.id);
|
||||
|
||||
console.log('\n');
|
||||
const mHeader = [
|
||||
'Malli'.padEnd(35),
|
||||
...scenarioIds.map(s => s.padEnd(18)),
|
||||
'Yht.'.padEnd(8),
|
||||
'Aika'.padEnd(8),
|
||||
'tok/s'.padEnd(7),
|
||||
'Pisteet',
|
||||
].join(' │ ');
|
||||
console.log(mHeader);
|
||||
console.log('─'.repeat(mHeader.length));
|
||||
|
||||
for (const model of modelNames) {
|
||||
const mrs = results.filter(r => r.model === model);
|
||||
const cols = scenarioIds.map(sid => {
|
||||
const r = mrs.find(r => r.scenario === sid);
|
||||
if (!r) return '-'.padEnd(18);
|
||||
const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
||||
const s = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
||||
return `${t} ${s}`.padEnd(18);
|
||||
});
|
||||
const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0);
|
||||
const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0);
|
||||
const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0);
|
||||
const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0;
|
||||
const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0;
|
||||
const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0;
|
||||
const row = [
|
||||
model.padEnd(35),
|
||||
...cols,
|
||||
`${totalPassed}/${totalTests}`.padEnd(8),
|
||||
`${(totalTime/1000).toFixed(0)}s`.padEnd(8),
|
||||
`${avgSpeed}`.padEnd(7),
|
||||
`${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`,
|
||||
].join(' │ ');
|
||||
console.log(row);
|
||||
}
|
||||
|
||||
// Tallenna JSON
|
||||
writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
|
||||
console.log(`\nJSON: ${OUTPUT_DIR}/results.json`);
|
||||
@@ -504,7 +546,8 @@ async function main() {
|
||||
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
|
||||
const failed = results.filter(r => r.error || r.testsTotal === 0);
|
||||
const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
|
||||
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length}`);
|
||||
const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0);
|
||||
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`);
|
||||
}
|
||||
|
||||
main().catch(e => { console.error(e); process.exit(1); });
|
||||
|
||||
Reference in New Issue
Block a user