todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu, mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335). Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
184 lines
25 KiB
HTML
184 lines
25 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="fi">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>Kipina Model Benchmark</title>
|
||
<style>
|
||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||
th:hover { color: var(--text); }
|
||
th.sorted-asc::after { content: ' ▲'; }
|
||
th.sorted-desc::after { content: ' ▼'; }
|
||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||
tr:hover td { background: #1c2128; }
|
||
.pass { color: var(--green); }
|
||
.partial { color: var(--yellow); }
|
||
.fail { color: var(--red); }
|
||
.stars { letter-spacing: 1px; }
|
||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||
.bar-bg { background: var(--border); }
|
||
.bar-fill { background: var(--green); }
|
||
.bar-partial { background: var(--yellow); }
|
||
.model-name { font-weight: 600; }
|
||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
|
||
<h1>Kipina Model Benchmark</h1>
|
||
<div class="meta" id="meta"></div>
|
||
|
||
<div class="cards" id="cards"></div>
|
||
|
||
<h2>Mallikohtainen yhteenveto</h2>
|
||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||
|
||
<h2>Kaikki tulokset</h2>
|
||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||
|
||
<script>
|
||
const RAW = [{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":1,"testsFailed":5,"totalDurationMs":30801,"totalTokens":2333,"avgTokPerSec":122.77922150989748,"promptChars":10015,"promptTokensEst":2504,"score":50,"stars":"★★★☆☆","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":25495,"totalTokens":2714,"avgTokPerSec":122.70970007652487,"promptChars":9891,"promptTokensEst":2473,"score":91,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":10,"testsFailed":1,"totalDurationMs":37153,"totalTokens":3979,"avgTokPerSec":121.9183958236036,"promptChars":11158,"promptTokensEst":2790,"score":95,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":43456,"totalTokens":2411,"avgTokPerSec":60.89226084568145,"promptChars":9831,"promptTokensEst":2458,"score":91,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":40376,"totalTokens":2237,"avgTokPerSec":61.028627032662456,"promptChars":9343,"promptTokensEst":2336,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":2,"testsFailed":10,"totalDurationMs":68620,"totalTokens":3796,"avgTokPerSec":60.47793268944476,"promptChars":10497,"promptTokensEst":2624,"score":50,"stars":"★★★☆☆","error":null,"round":1},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":25235,"totalTokens":2269,"avgTokPerSec":101.24212769079884,"promptChars":9294,"promptTokensEst":2324,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":21720,"totalTokens":1942,"avgTokPerSec":101.65074583709965,"promptChars":9020,"promptTokensEst":2255,"score":85,"stars":"★★★★☆","error":null,"round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":10,"testsFailed":1,"totalDurationMs":39006,"totalTokens":3509,"avgTokPerSec":100.43593706181406,"promptChars":10372,"promptTokensEst":2593,"score":95,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":21989,"totalTokens":2339,"avgTokPerSec":122.8454095677367,"promptChars":10052,"promptTokensEst":2513,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":23997,"totalTokens":2551,"avgTokPerSec":122.23722733560855,"promptChars":9973,"promptTokensEst":2493,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":30169,"totalTokens":3249,"avgTokPerSec":123.04696524796096,"promptChars":11097,"promptTokensEst":2774,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":47091,"totalTokens":2602,"avgTokPerSec":60.962687726457375,"promptChars":9633,"promptTokensEst":2408,"score":80,"stars":"★★★★☆","error":null,"round":2},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":41747,"totalTokens":2313,"avgTokPerSec":60.949025583617605,"promptChars":9373,"promptTokensEst":2343,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":2,"testsFailed":10,"totalDurationMs":66888,"totalTokens":3699,"avgTokPerSec":60.49540514685331,"promptChars":10323,"promptTokensEst":2581,"score":50,"stars":"★★★☆☆","error":null,"round":2},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":7,"testsFailed":1,"totalDurationMs":27036,"totalTokens":2434,"avgTokPerSec":101.01399069228444,"promptChars":9513,"promptTokensEst":2378,"score":93,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":20927,"totalTokens":1872,"avgTokPerSec":101.45096098956486,"promptChars":8881,"promptTokensEst":2220,"score":91,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":2},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":26919,"totalTokens":2889,"avgTokPerSec":123.63666629145064,"promptChars":10162,"promptTokensEst":2541,"score":85,"stars":"★★★★☆","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":27592,"totalTokens":2946,"avgTokPerSec":122.33273400152825,"promptChars":9469,"promptTokensEst":2367,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":35734,"totalTokens":3827,"avgTokPerSec":122.65156559717951,"promptChars":11086,"promptTokensEst":2772,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":50372,"totalTokens":2795,"avgTokPerSec":60.91611850918806,"promptChars":9758,"promptTokensEst":2440,"score":80,"stars":"★★★★☆","error":null,"round":3},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":1,"testsFailed":5,"totalDurationMs":38716,"totalTokens":2144,"avgTokPerSec":61.0412890406478,"promptChars":9415,"promptTokensEst":2354,"score":50,"stars":"★★★☆☆","error":null,"round":3},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":14,"testsPassed":7,"testsFailed":7,"totalDurationMs":74882,"totalTokens":4130,"avgTokPerSec":60.32640855026445,"promptChars":10506,"promptTokensEst":2627,"score":70,"stars":"★★★★☆","error":null,"round":3},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":3,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":35913,"totalTokens":3218,"avgTokPerSec":100.38516205100154,"promptChars":11338,"promptTokensEst":2835,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":3},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":20974,"totalTokens":1880,"avgTokPerSec":101.52450928280543,"promptChars":8803,"promptTokensEst":2201,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":9,"testsFailed":2,"totalDurationMs":36005,"totalTokens":3243,"avgTokPerSec":100.44301406462307,"promptChars":10414,"promptTokensEst":2604,"score":89,"stars":"★★★★☆","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":1,"testsFailed":6,"totalDurationMs":23071,"totalTokens":2469,"avgTokPerSec":124.09643322620661,"promptChars":9960,"promptTokensEst":2490,"score":49,"stars":"★★☆☆☆","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":2,"testsFailed":6,"totalDurationMs":27062,"totalTokens":2907,"avgTokPerSec":123.35530975346687,"promptChars":9558,"promptTokensEst":2390,"score":55,"stars":"★★★☆☆","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":9,"testsFailed":0,"totalDurationMs":29395,"totalTokens":3156,"avgTokPerSec":123.22575073561812,"promptChars":10574,"promptTokensEst":2644,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":39590,"totalTokens":2198,"avgTokPerSec":61.051945510465806,"promptChars":9664,"promptTokensEst":2416,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":1,"testsFailed":5,"totalDurationMs":36950,"totalTokens":2042,"avgTokPerSec":61.01436784429489,"promptChars":9225,"promptTokensEst":2306,"score":50,"stars":"★★★☆☆","error":null,"round":4},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":14,"testsPassed":2,"testsFailed":12,"totalDurationMs":80600,"totalTokens":4437,"avgTokPerSec":60.29371170543078,"promptChars":10688,"promptTokensEst":2672,"score":49,"stars":"★★☆☆☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":29125,"totalTokens":2619,"avgTokPerSec":100.90587777586212,"promptChars":9899,"promptTokensEst":2475,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":4},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":21847,"totalTokens":1957,"avgTokPerSec":101.44111070734304,"promptChars":8946,"promptTokensEst":2237,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":4},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":1,"testsFailed":5,"totalDurationMs":21127,"totalTokens":2245,"avgTokPerSec":124.22714049663371,"promptChars":9972,"promptTokensEst":2493,"score":50,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":7,"testsFailed":2,"totalDurationMs":30281,"totalTokens":3079,"avgTokPerSec":123.00254714651271,"promptChars":9562,"promptTokensEst":2391,"score":87,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":39630,"totalTokens":4274,"avgTokPerSec":123.08303937451802,"promptChars":11119,"promptTokensEst":2780,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":38032,"totalTokens":2104,"avgTokPerSec":61.05445464163662,"promptChars":9455,"promptTokensEst":2364,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":39620,"totalTokens":2193,"avgTokPerSec":61.04565233675101,"promptChars":9481,"promptTokensEst":2370,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":5},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":7,"testsFailed":2,"totalDurationMs":63579,"totalTokens":3520,"avgTokPerSec":60.51513453009977,"promptChars":10493,"promptTokensEst":2623,"score":87,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":9,"testsPassed":6,"testsFailed":3,"totalDurationMs":30845,"totalTokens":2777,"avgTokPerSec":100.79046137130972,"promptChars":9507,"promptTokensEst":2377,"score":80,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":21413,"totalTokens":1914,"avgTokPerSec":101.25525436264132,"promptChars":8804,"promptTokensEst":2201,"score":85,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":5}];
|
||
|
||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||
function calcScore(r) {
|
||
if (r.error && r.testsTotal === 0) return 0;
|
||
let s = 0;
|
||
if (r.specOk) s += 10;
|
||
if (!r.error || r.testsTotal > 0) s += 10;
|
||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||
return Math.min(100, s);
|
||
}
|
||
// Laske pisteet jos puuttuvat
|
||
const DATA = RAW.map(r => {
|
||
if (r.score == null) r.score = calcScore(r);
|
||
if (!r.stars) r.stars = starsFor(r.score);
|
||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||
return r;
|
||
});
|
||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||
const pctBar = (passed, total, w=80) => {
|
||
if (total === 0) return '-';
|
||
const pct = passed/total*100;
|
||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||
};
|
||
|
||
// Meta
|
||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||
|
||
// Cards
|
||
const models = [...new Set(DATA.map(r => r.model))];
|
||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||
const bestModel = models.map(m => {
|
||
const mrs = DATA.filter(r => r.model === m);
|
||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||
}).sort((a,b) => b.avg - a.avg)[0];
|
||
const fastestModel = models.map(m => {
|
||
const mrs = DATA.filter(r => r.model === m);
|
||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||
}).sort((a,b) => b.speed - a.speed)[0];
|
||
|
||
document.getElementById('cards').innerHTML = `
|
||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||
`;
|
||
|
||
// Summary table
|
||
const sumHead = document.querySelector('#summary-table thead');
|
||
const sumBody = document.querySelector('#summary-table tbody');
|
||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||
|
||
const modelRows = models.map(m => {
|
||
const mrs = DATA.filter(r => r.model === m);
|
||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||
const scenCols = scenarios.map(s => {
|
||
const r = mrs.find(r => r.scenario === s);
|
||
if (!r) return '<td>-</td>';
|
||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||
}).join('');
|
||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||
}).sort((a,b) => b.avg - a.avg);
|
||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||
|
||
// Results table
|
||
const resHead = document.querySelector('#results-table thead');
|
||
const resBody = document.querySelector('#results-table tbody');
|
||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||
|
||
let sortCol = 9, sortAsc = false;
|
||
function renderResults() {
|
||
const sorted = [...DATA].sort((a,b) => {
|
||
const vals = [
|
||
[a.model, b.model],
|
||
[a.scenario, b.scenario],
|
||
[a.specEntities, b.specEntities],
|
||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||
[a.fixRounds, b.fixRounds],
|
||
[a.promptTokensEst, b.promptTokensEst],
|
||
[a.totalTokens, b.totalTokens],
|
||
[a.totalDurationMs, b.totalDurationMs],
|
||
[a.avgTokPerSec, b.avgTokPerSec],
|
||
[a.score, b.score],
|
||
][sortCol];
|
||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||
return sortAsc ? cmp : -cmp;
|
||
});
|
||
resBody.innerHTML = sorted.map(r => {
|
||
const c = cls(r);
|
||
return `<tr>
|
||
<td class="model-name">${r.model}</td>
|
||
<td>${r.scenario}</td>
|
||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||
</tr>`;
|
||
}).join('');
|
||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||
});
|
||
}
|
||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||
const col = parseInt(e.target.dataset.col);
|
||
if (isNaN(col)) return;
|
||
if (sortCol === col) sortAsc = !sortAsc;
|
||
else { sortCol = col; sortAsc = false; }
|
||
renderResults();
|
||
});
|
||
renderResults();
|
||
</script>
|
||
</body>
|
||
</html>
|