Files
agentic-studio/kipina-codebench/results/2026-04-14_top3.html

184 lines
13 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="fi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kipina Model Benchmark</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
th:hover { color: var(--text); }
th.sorted-asc::after { content: ' ▲'; }
th.sorted-desc::after { content: ' ▼'; }
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
tr:hover td { background: #1c2128; }
.pass { color: var(--green); }
.partial { color: var(--yellow); }
.fail { color: var(--red); }
.stars { letter-spacing: 1px; }
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
.bar-bg { background: var(--border); }
.bar-fill { background: var(--green); }
.bar-partial { background: var(--yellow); }
.model-name { font-weight: 600; }
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
</style>
</head>
<body>
<h1>Kipina Model Benchmark</h1>
<div class="meta" id="meta"></div>
<div class="cards" id="cards"></div>
<h2>Mallikohtainen yhteenveto</h2>
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
<h2>Kaikki tulokset</h2>
<table id="results-table"><thead></thead><tbody></tbody></table>
<script>
const RAW = [{"model":"qwen3.5:35b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":63592,"totalTokens":4103,"avgTokPerSec":88.29857987765199,"promptChars":11310,"promptTokensEst":2828,"score":40,"stars":"★★☆☆☆","error":null},{"model":"qwen3.5:35b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":35262,"totalTokens":2733,"avgTokPerSec":88.26749243915684,"promptChars":10165,"promptTokensEst":2541,"score":40,"stars":"★★☆☆☆","error":null},{"model":"qwen3.5:35b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":60346,"totalTokens":4728,"avgTokPerSec":87.67792775342463,"promptChars":11661,"promptTokensEst":2915,"score":40,"stars":"★★☆☆☆","error":null},{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":5,"testsPassed":4,"testsFailed":1,"totalDurationMs":80515,"totalTokens":3081,"avgTokPerSec":43.828884806830445,"promptChars":10150,"promptTokensEst":2538,"score":88,"stars":"★★★★☆","error":null},{"model":"codestral:22b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":5,"testsPassed":3,"testsFailed":2,"totalDurationMs":61598,"totalTokens":2441,"avgTokPerSec":44.017116943523455,"promptChars":9288,"promptTokensEst":2322,"score":76,"stars":"★★★★☆","error":null},{"model":"codestral:22b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":81568,"totalTokens":3229,"avgTokPerSec":43.67638078062432,"promptChars":10475,"promptTokensEst":2619,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":30315,"totalTokens":2379,"avgTokPerSec":123.42041099401449,"promptChars":10111,"promptTokensEst":2528,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":23071,"totalTokens":2443,"avgTokPerSec":123.11212122029796,"promptChars":9150,"promptTokensEst":2288,"score":91,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":40933,"totalTokens":4370,"avgTokPerSec":121.8144240305409,"promptChars":10789,"promptTokensEst":2697,"score":100,"stars":"★★★★★","error":null}];
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
function calcScore(r) {
if (r.error && r.testsTotal === 0) return 0;
let s = 0;
if (r.specOk) s += 10;
if (!r.error || r.testsTotal > 0) s += 10;
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
return Math.min(100, s);
}
// Laske pisteet jos puuttuvat
const DATA = RAW.map(r => {
if (r.score == null) r.score = calcScore(r);
if (!r.stars) r.stars = starsFor(r.score);
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
return r;
});
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
const pctBar = (passed, total, w=80) => {
if (total === 0) return '-';
const pct = passed/total*100;
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
};
// Meta
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')}${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
// Cards
const models = [...new Set(DATA.map(r => r.model))];
const scenarios = [...new Set(DATA.map(r => r.scenario))];
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
const bestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
}).sort((a,b) => b.avg - a.avg)[0];
const fastestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
}).sort((a,b) => b.speed - a.speed)[0];
document.getElementById('cards').innerHTML = `
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
`;
// Summary table
const sumHead = document.querySelector('#summary-table thead');
const sumBody = document.querySelector('#summary-table tbody');
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
const modelRows = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
const scenCols = scenarios.map(s => {
const r = mrs.find(r => r.scenario === s);
if (!r) return '<td>-</td>';
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
}).join('');
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
}).sort((a,b) => b.avg - a.avg);
sumBody.innerHTML = modelRows.map(r => r.html).join('');
// Results table
const resHead = document.querySelector('#results-table thead');
const resBody = document.querySelector('#results-table tbody');
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
let sortCol = 9, sortAsc = false;
function renderResults() {
const sorted = [...DATA].sort((a,b) => {
const vals = [
[a.model, b.model],
[a.scenario, b.scenario],
[a.specEntities, b.specEntities],
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
[a.fixRounds, b.fixRounds],
[a.promptTokensEst, b.promptTokensEst],
[a.totalTokens, b.totalTokens],
[a.totalDurationMs, b.totalDurationMs],
[a.avgTokPerSec, b.avgTokPerSec],
[a.score, b.score],
][sortCol];
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
return sortAsc ? cmp : -cmp;
});
resBody.innerHTML = sorted.map(r => {
const c = cls(r);
return `<tr>
<td class="model-name">${r.model}</td>
<td>${r.scenario}</td>
<td>${r.specOk ? `${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
<td>${r.avgTokPerSec.toFixed(0)}</td>
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
</tr>`;
}).join('');
document.querySelectorAll('#results-table th').forEach((th,i) => {
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
});
}
document.querySelector('#results-table thead').addEventListener('click', e => {
const col = parseInt(e.target.dataset.col);
if (isNaN(col)) return;
if (sortCol === col) sortAsc = !sortAsc;
else { sortCol = col; sortAsc = false; }
renderResults();
});
renderResults();
</script>
</body>
</html>