Files
agentic-studio/kipina-codebench/results/2026-04-14T08-18.html
jaakko 6a40ca5730 CodeBench: golden example markdown-muodossa (koodi + selitykset)
todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu,
mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335).
Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
2026-04-14 12:38:25 +03:00

184 lines
25 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="fi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kipina Model Benchmark</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
th:hover { color: var(--text); }
th.sorted-asc::after { content: ' ▲'; }
th.sorted-desc::after { content: ' ▼'; }
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
tr:hover td { background: #1c2128; }
.pass { color: var(--green); }
.partial { color: var(--yellow); }
.fail { color: var(--red); }
.stars { letter-spacing: 1px; }
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
.bar-bg { background: var(--border); }
.bar-fill { background: var(--green); }
.bar-partial { background: var(--yellow); }
.model-name { font-weight: 600; }
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
</style>
</head>
<body>
<h1>Kipina Model Benchmark</h1>
<div class="meta" id="meta"></div>
<div class="cards" id="cards"></div>
<h2>Mallikohtainen yhteenveto</h2>
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
<h2>Kaikki tulokset</h2>
<table id="results-table"><thead></thead><tbody></tbody></table>
<script>
const RAW = [{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":33892,"totalTokens":2675,"avgTokPerSec":88.07409036121237,"promptChars":9688,"promptTokensEst":2422,"score":80,"stars":"★★★★☆","error":null,"round":1},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":30647,"totalTokens":2549,"avgTokPerSec":88.4488185974085,"promptChars":9594,"promptTokensEst":2399,"score":85,"stars":"★★★★☆","error":null,"round":1},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":13,"testsPassed":6,"testsFailed":7,"totalDurationMs":44371,"totalTokens":3678,"avgTokPerSec":88.172616246191,"promptChars":10432,"promptTokensEst":2608,"score":68,"stars":"★★★☆☆","error":null,"round":1},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":18385,"totalTokens":2375,"avgTokPerSec":147.62230806597154,"promptChars":9478,"promptTokensEst":2370,"score":91,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":13968,"totalTokens":1904,"avgTokPerSec":148.3084817167518,"promptChars":8837,"promptTokensEst":2209,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":25642,"totalTokens":3476,"avgTokPerSec":146.49556892944076,"promptChars":10734,"promptTokensEst":2684,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":1},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":19982,"totalTokens":2937,"avgTokPerSec":191.2786317674431,"promptChars":10281,"promptTokensEst":2570,"score":85,"stars":"★★★★☆","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":17114,"totalTokens":2903,"avgTokPerSec":190.51221206765385,"promptChars":9654,"promptTokensEst":2414,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":22352,"totalTokens":3776,"avgTokPerSec":190.56628728306987,"promptChars":11134,"promptTokensEst":2784,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":31217,"totalTokens":2463,"avgTokPerSec":88.6684646675098,"promptChars":9598,"promptTokensEst":2400,"score":85,"stars":"★★★★☆","error":null,"round":2},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":27520,"totalTokens":2288,"avgTokPerSec":88.64765360012593,"promptChars":9612,"promptTokensEst":2403,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":3,"testsFailed":9,"totalDurationMs":41874,"totalTokens":3474,"avgTokPerSec":88.22266853318554,"promptChars":10408,"promptTokensEst":2602,"score":55,"stars":"★★★☆☆","error":null,"round":2},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":24781,"totalTokens":3240,"avgTokPerSec":146.89167309934365,"promptChars":10179,"promptTokensEst":2545,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":19148,"totalTokens":2605,"avgTokPerSec":147.55250620481297,"promptChars":9634,"promptTokensEst":2409,"score":80,"stars":"★★★★☆","error":null,"round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":23816,"totalTokens":3232,"avgTokPerSec":147.25857324533817,"promptChars":9226,"promptTokensEst":2307,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":16639,"totalTokens":2369,"avgTokPerSec":191.61273045157245,"promptChars":10048,"promptTokensEst":2512,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":8,"testsFailed":1,"totalDurationMs":18588,"totalTokens":3163,"avgTokPerSec":190.86975006725547,"promptChars":10048,"promptTokensEst":2512,"score":93,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":10,"testsPassed":10,"testsFailed":0,"totalDurationMs":22677,"totalTokens":3828,"avgTokPerSec":190.15611016906482,"promptChars":11090,"promptTokensEst":2773,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":26449,"totalTokens":2063,"avgTokPerSec":88.77498453063184,"promptChars":9608,"promptTokensEst":2402,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":27510,"totalTokens":2289,"avgTokPerSec":88.74699253414485,"promptChars":9418,"promptTokensEst":2355,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":3,"testsFailed":9,"totalDurationMs":45105,"totalTokens":3738,"avgTokPerSec":88.04788102995212,"promptChars":10564,"promptTokensEst":2641,"score":55,"stars":"★★★☆☆","error":null,"round":3},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":7,"testsFailed":1,"totalDurationMs":19204,"totalTokens":2480,"avgTokPerSec":147.91758782382294,"promptChars":9391,"promptTokensEst":2348,"score":93,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":12990,"totalTokens":1769,"avgTokPerSec":148.2616673700717,"promptChars":8898,"promptTokensEst":2225,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":10,"testsFailed":2,"totalDurationMs":25831,"totalTokens":3500,"avgTokPerSec":146.86924785880186,"promptChars":9465,"promptTokensEst":2366,"score":90,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":19453,"totalTokens":2845,"avgTokPerSec":191.37382231956113,"promptChars":10157,"promptTokensEst":2539,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":9,"testsFailed":0,"totalDurationMs":21570,"totalTokens":3529,"avgTokPerSec":190.65454623497536,"promptChars":9732,"promptTokensEst":2433,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":25537,"totalTokens":4300,"avgTokPerSec":189.94521619124598,"promptChars":11127,"promptTokensEst":2782,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":7,"testsFailed":2,"totalDurationMs":31923,"totalTokens":2522,"avgTokPerSec":88.62182881661799,"promptChars":9700,"promptTokensEst":2425,"score":87,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":26000,"totalTokens":2163,"avgTokPerSec":88.86878707672254,"promptChars":9288,"promptTokensEst":2322,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":10,"testsPassed":10,"testsFailed":0,"totalDurationMs":43275,"totalTokens":3588,"avgTokPerSec":88.24995936347965,"promptChars":10173,"promptTokensEst":2543,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":14,"testsPassed":0,"testsFailed":14,"totalDurationMs":30045,"totalTokens":3913,"avgTokPerSec":146.51683619371713,"promptChars":10334,"promptTokensEst":2584,"score":40,"stars":"★★☆☆☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":5,"testsFailed":4,"totalDurationMs":17076,"totalTokens":2321,"avgTokPerSec":147.99547121069506,"promptChars":9451,"promptTokensEst":2363,"score":73,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":23890,"totalTokens":3243,"avgTokPerSec":147.20125507974117,"promptChars":9217,"promptTokensEst":2304,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":21812,"totalTokens":3246,"avgTokPerSec":191.07801335688654,"promptChars":10249,"promptTokensEst":2562,"score":85,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":8,"testsFailed":1,"totalDurationMs":20325,"totalTokens":3441,"avgTokPerSec":190.10241840094508,"promptChars":9930,"promptTokensEst":2483,"score":93,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":26087,"totalTokens":4387,"avgTokPerSec":189.8005689388054,"promptChars":11109,"promptTokensEst":2777,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":30287,"totalTokens":2388,"avgTokPerSec":88.72243320918638,"promptChars":9695,"promptTokensEst":2424,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":31212,"totalTokens":2601,"avgTokPerSec":88.71289036919063,"promptChars":9619,"promptTokensEst":2405,"score":80,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":15,"testsPassed":3,"testsFailed":12,"totalDurationMs":50939,"totalTokens":4217,"avgTokPerSec":88.06125722020734,"promptChars":10743,"promptTokensEst":2686,"score":52,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":17913,"totalTokens":2310,"avgTokPerSec":148.0291268001691,"promptChars":9357,"promptTokensEst":2339,"score":91,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":13948,"totalTokens":1898,"avgTokPerSec":148.37907379944423,"promptChars":8725,"promptTokensEst":2181,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":5},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":1,"testsFailed":5,"totalDurationMs":15229,"totalTokens":2119,"avgTokPerSec":192.33007410215646,"promptChars":9827,"promptTokensEst":2457,"score":50,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":18223,"totalTokens":3093,"avgTokPerSec":190.71372054282037,"promptChars":9641,"promptTokensEst":2410,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":10,"testsPassed":1,"testsFailed":9,"totalDurationMs":21215,"totalTokens":3589,"avgTokPerSec":190.49493540345176,"promptChars":11180,"promptTokensEst":2795,"score":46,"stars":"★★☆☆☆","error":null,"round":5}];
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
function calcScore(r) {
if (r.error && r.testsTotal === 0) return 0;
let s = 0;
if (r.specOk) s += 10;
if (!r.error || r.testsTotal > 0) s += 10;
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
return Math.min(100, s);
}
// Laske pisteet jos puuttuvat
const DATA = RAW.map(r => {
if (r.score == null) r.score = calcScore(r);
if (!r.stars) r.stars = starsFor(r.score);
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
return r;
});
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
const pctBar = (passed, total, w=80) => {
if (total === 0) return '-';
const pct = passed/total*100;
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
};
// Meta
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')}${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
// Cards
const models = [...new Set(DATA.map(r => r.model))];
const scenarios = [...new Set(DATA.map(r => r.scenario))];
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
const bestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
}).sort((a,b) => b.avg - a.avg)[0];
const fastestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
}).sort((a,b) => b.speed - a.speed)[0];
document.getElementById('cards').innerHTML = `
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
`;
// Summary table
const sumHead = document.querySelector('#summary-table thead');
const sumBody = document.querySelector('#summary-table tbody');
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
const modelRows = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
const scenCols = scenarios.map(s => {
const r = mrs.find(r => r.scenario === s);
if (!r) return '<td>-</td>';
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
}).join('');
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
}).sort((a,b) => b.avg - a.avg);
sumBody.innerHTML = modelRows.map(r => r.html).join('');
// Results table
const resHead = document.querySelector('#results-table thead');
const resBody = document.querySelector('#results-table tbody');
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
let sortCol = 9, sortAsc = false;
function renderResults() {
const sorted = [...DATA].sort((a,b) => {
const vals = [
[a.model, b.model],
[a.scenario, b.scenario],
[a.specEntities, b.specEntities],
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
[a.fixRounds, b.fixRounds],
[a.promptTokensEst, b.promptTokensEst],
[a.totalTokens, b.totalTokens],
[a.totalDurationMs, b.totalDurationMs],
[a.avgTokPerSec, b.avgTokPerSec],
[a.score, b.score],
][sortCol];
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
return sortAsc ? cmp : -cmp;
});
resBody.innerHTML = sorted.map(r => {
const c = cls(r);
return `<tr>
<td class="model-name">${r.model}</td>
<td>${r.scenario}</td>
<td>${r.specOk ? `${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
<td>${r.avgTokPerSec.toFixed(0)}</td>
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
</tr>`;
}).join('');
document.querySelectorAll('#results-table th').forEach((th,i) => {
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
});
}
document.querySelector('#results-table thead').addEventListener('click', e => {
const col = parseInt(e.target.dataset.col);
if (isNaN(col)) return;
if (sortCol === col) sortAsc = !sortAsc;
else { sortCol = col; sortAsc = false; }
renderResults();
});
renderResults();
</script>
</body>
</html>