initial commit: agentic office
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
183
kipina-codebench/results/2026-04-14T19-58-38.html
Normal file
183
kipina-codebench/results/2026-04-14T19-58-38.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":31870,"totalTokens":4024,"avgTokPerSec":143.80989267612117,"promptChars":11168,"promptTokensEst":2792,"score":40,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":50050,"totalTokens":6218,"avgTokPerSec":142.0357712618218,"promptChars":11624,"promptTokensEst":2906,"score":40,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":45209,"totalTokens":5571,"avgTokPerSec":139.512254185898,"promptChars":11544,"promptTokensEst":2886,"score":40,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":3}];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
71
kipina-codebench/results/2026-04-14T19-58-38.json
Normal file
71
kipina-codebench/results/2026-04-14T19-58-38.json
Normal file
@@ -0,0 +1,71 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 31870,
|
||||
"totalTokens": 4024,
|
||||
"avgTokPerSec": 143.80989267612117,
|
||||
"promptChars": 11168,
|
||||
"promptTokensEst": 2792,
|
||||
"score": 40,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 50050,
|
||||
"totalTokens": 6218,
|
||||
"avgTokPerSec": 142.0357712618218,
|
||||
"promptChars": 11624,
|
||||
"promptTokensEst": 2906,
|
||||
"score": 40,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 45209,
|
||||
"totalTokens": 5571,
|
||||
"avgTokPerSec": 139.512254185898,
|
||||
"promptChars": 11544,
|
||||
"promptTokensEst": 2886,
|
||||
"score": 40,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 3
|
||||
}
|
||||
]
|
||||
183
kipina-codebench/results/2026-04-14T20-19-09.html
Normal file
183
kipina-codebench/results/2026-04-14T20-19-09.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-19-09.json
Normal file
1
kipina-codebench/results/2026-04-14T20-19-09.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T20-24-36.html
Normal file
183
kipina-codebench/results/2026-04-14T20-24-36.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-24-36.json
Normal file
1
kipina-codebench/results/2026-04-14T20-24-36.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T20-32-12.html
Normal file
183
kipina-codebench/results/2026-04-14T20-32-12.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":30421,"promptTokensEst":7605,"score":0,"stars":"","error":"Puuttuvat: go.mod","round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":26971,"promptTokensEst":6743,"score":0,"stars":"","error":"Puuttuvat: go.mod","round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":27694,"promptTokensEst":6924,"score":0,"stars":"","error":"Puuttuvat: go.mod","round":3}];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
65
kipina-codebench/results/2026-04-14T20-32-12.json
Normal file
65
kipina-codebench/results/2026-04-14T20-32-12.json
Normal file
@@ -0,0 +1,65 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 30421,
|
||||
"promptTokensEst": 7605,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "Puuttuvat: go.mod",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 26971,
|
||||
"promptTokensEst": 6743,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "Puuttuvat: go.mod",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 27694,
|
||||
"promptTokensEst": 6924,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "Puuttuvat: go.mod",
|
||||
"round": 3
|
||||
}
|
||||
]
|
||||
183
kipina-codebench/results/2026-04-14T20-37-18.html
Normal file
183
kipina-codebench/results/2026-04-14T20-37-18.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":95709,"totalTokens":11690,"avgTokPerSec":132.16841505518815,"promptChars":20579,"promptTokensEst":5145,"score":30,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":45562,"totalTokens":5534,"avgTokPerSec":132.0768897820692,"promptChars":19628,"promptTokensEst":4907,"score":40,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":47588,"totalTokens":5819,"avgTokPerSec":132.74582771351155,"promptChars":20752,"promptTokensEst":5188,"score":40,"stars":"★★☆☆☆","error":null,"profile":"small","promptName":"code-go","round":3}];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
71
kipina-codebench/results/2026-04-14T20-37-18.json
Normal file
71
kipina-codebench/results/2026-04-14T20-37-18.json
Normal file
@@ -0,0 +1,71 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 1,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 95709,
|
||||
"totalTokens": 11690,
|
||||
"avgTokPerSec": 132.16841505518815,
|
||||
"promptChars": 20579,
|
||||
"promptTokensEst": 5145,
|
||||
"score": 30,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 45562,
|
||||
"totalTokens": 5534,
|
||||
"avgTokPerSec": 132.0768897820692,
|
||||
"promptChars": 19628,
|
||||
"promptTokensEst": 4907,
|
||||
"score": 40,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 47588,
|
||||
"totalTokens": 5819,
|
||||
"avgTokPerSec": 132.74582771351155,
|
||||
"promptChars": 20752,
|
||||
"promptTokensEst": 5188,
|
||||
"score": 40,
|
||||
"stars": "★★☆☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-go",
|
||||
"round": 3
|
||||
}
|
||||
]
|
||||
183
kipina-codebench/results/2026-04-14T20-41-39.html
Normal file
183
kipina-codebench/results/2026-04-14T20-41-39.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-41-39.json
Normal file
1
kipina-codebench/results/2026-04-14T20-41-39.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T20-41-48.html
Normal file
183
kipina-codebench/results/2026-04-14T20-41-48.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-41-48.json
Normal file
1
kipina-codebench/results/2026-04-14T20-41-48.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T20-41-56.html
Normal file
183
kipina-codebench/results/2026-04-14T20-41-56.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-41-56.json
Normal file
1
kipina-codebench/results/2026-04-14T20-41-56.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T20-42-21.html
Normal file
183
kipina-codebench/results/2026-04-14T20-42-21.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T20-42-21.json
Normal file
1
kipina-codebench/results/2026-04-14T20-42-21.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T21-15-46.html
Normal file
183
kipina-codebench/results/2026-04-14T21-15-46.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T21-15-46.json
Normal file
1
kipina-codebench/results/2026-04-14T21-15-46.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T21-40-58.html
Normal file
183
kipina-codebench/results/2026-04-14T21-40-58.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":18254,"totalTokens":2677,"avgTokPerSec":189.94573063405974,"promptChars":12393,"promptTokensEst":3098,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":1},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":8,"testsPassed":7,"testsFailed":1,"totalDurationMs":62436,"totalTokens":10254,"avgTokPerSec":180.7144707287582,"promptChars":12114,"promptTokensEst":3029,"score":73,"stars":"★★★★☆","error":null,"profile":"large","promptName":"code","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":31205,"totalTokens":5060,"avgTokPerSec":184.0318504412911,"promptChars":12363,"promptTokensEst":3091,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":1},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":2,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":33675,"totalTokens":5292,"avgTokPerSec":187.55877092211753,"promptChars":12030,"promptTokensEst":3008,"score":80,"stars":"★★★★☆","error":null,"profile":"large","promptName":"code","round":2},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":17703,"totalTokens":2890,"avgTokPerSec":185.61718567437205,"promptChars":12222,"promptTokensEst":3056,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":48697,"totalTokens":7851,"avgTokPerSec":181.3588176072106,"promptChars":12618,"promptTokensEst":3155,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":2},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":17426,"totalTokens":2532,"avgTokPerSec":189.99848651835183,"promptChars":12217,"promptTokensEst":3054,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":3},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":9,"testsPassed":8,"testsFailed":1,"totalDurationMs":51361,"totalTokens":8528,"avgTokPerSec":183.31274532724456,"promptChars":11465,"promptTokensEst":2866,"score":73,"stars":"★★★★☆","error":null,"profile":"large","promptName":"code","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":49147,"totalTokens":7931,"avgTokPerSec":178.9319278594721,"promptChars":12262,"promptTokensEst":3066,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code","round":3}];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
209
kipina-codebench/results/2026-04-14T21-40-58.json
Normal file
209
kipina-codebench/results/2026-04-14T21-40-58.json
Normal file
@@ -0,0 +1,209 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 18254,
|
||||
"totalTokens": 2677,
|
||||
"avgTokPerSec": 189.94573063405974,
|
||||
"promptChars": 12393,
|
||||
"promptTokensEst": 3098,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 8,
|
||||
"testsPassed": 7,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 62436,
|
||||
"totalTokens": 10254,
|
||||
"avgTokPerSec": 180.7144707287582,
|
||||
"promptChars": 12114,
|
||||
"promptTokensEst": 3029,
|
||||
"score": 73,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 12,
|
||||
"testsPassed": 12,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 31205,
|
||||
"totalTokens": 5060,
|
||||
"avgTokPerSec": 184.0318504412911,
|
||||
"promptChars": 12363,
|
||||
"promptTokensEst": 3091,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 2,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 33675,
|
||||
"totalTokens": 5292,
|
||||
"avgTokPerSec": 187.55877092211753,
|
||||
"promptChars": 12030,
|
||||
"promptTokensEst": 3008,
|
||||
"score": 80,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 17703,
|
||||
"totalTokens": 2890,
|
||||
"avgTokPerSec": 185.61718567437205,
|
||||
"promptChars": 12222,
|
||||
"promptTokensEst": 3056,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 1,
|
||||
"testsTotal": 12,
|
||||
"testsPassed": 12,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 48697,
|
||||
"totalTokens": 7851,
|
||||
"avgTokPerSec": 181.3588176072106,
|
||||
"promptChars": 12618,
|
||||
"promptTokensEst": 3155,
|
||||
"score": 90,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 17426,
|
||||
"totalTokens": 2532,
|
||||
"avgTokPerSec": 189.99848651835183,
|
||||
"promptChars": 12217,
|
||||
"promptTokensEst": 3054,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 9,
|
||||
"testsPassed": 8,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 51361,
|
||||
"totalTokens": 8528,
|
||||
"avgTokPerSec": 183.31274532724456,
|
||||
"promptChars": 11465,
|
||||
"promptTokensEst": 2866,
|
||||
"score": 73,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3-coder:30b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 1,
|
||||
"testsTotal": 12,
|
||||
"testsPassed": 12,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 49147,
|
||||
"totalTokens": 7931,
|
||||
"avgTokPerSec": 178.9319278594721,
|
||||
"promptChars": 12262,
|
||||
"promptTokensEst": 3066,
|
||||
"score": 90,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"profile": "large",
|
||||
"promptName": "code",
|
||||
"round": 3
|
||||
}
|
||||
]
|
||||
183
kipina-codebench/results/2026-04-14T21-50-37.html
Normal file
183
kipina-codebench/results/2026-04-14T21-50-37.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T21-50-37.json
Normal file
1
kipina-codebench/results/2026-04-14T21-50-37.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
183
kipina-codebench/results/2026-04-14T21-50-42.html
Normal file
183
kipina-codebench/results/2026-04-14T21-50-42.html
Normal file
@@ -0,0 +1,183 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fi">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kipina Model Benchmark</title>
|
||||
<style>
|
||||
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
th:hover { color: var(--text); }
|
||||
th.sorted-asc::after { content: ' ▲'; }
|
||||
th.sorted-desc::after { content: ' ▼'; }
|
||||
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||
tr:hover td { background: #1c2128; }
|
||||
.pass { color: var(--green); }
|
||||
.partial { color: var(--yellow); }
|
||||
.fail { color: var(--red); }
|
||||
.stars { letter-spacing: 1px; }
|
||||
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||
.bar-bg { background: var(--border); }
|
||||
.bar-fill { background: var(--green); }
|
||||
.bar-partial { background: var(--yellow); }
|
||||
.model-name { font-weight: 600; }
|
||||
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Kipina Model Benchmark</h1>
|
||||
<div class="meta" id="meta"></div>
|
||||
|
||||
<div class="cards" id="cards"></div>
|
||||
|
||||
<h2>Mallikohtainen yhteenveto</h2>
|
||||
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<h2>Kaikki tulokset</h2>
|
||||
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||
|
||||
<script>
|
||||
const RAW = [];
|
||||
|
||||
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||
function calcScore(r) {
|
||||
if (r.error && r.testsTotal === 0) return 0;
|
||||
let s = 0;
|
||||
if (r.specOk) s += 10;
|
||||
if (!r.error || r.testsTotal > 0) s += 10;
|
||||
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||
return Math.min(100, s);
|
||||
}
|
||||
// Laske pisteet jos puuttuvat
|
||||
const DATA = RAW.map(r => {
|
||||
if (r.score == null) r.score = calcScore(r);
|
||||
if (!r.stars) r.stars = starsFor(r.score);
|
||||
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||
return r;
|
||||
});
|
||||
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||
const pctBar = (passed, total, w=80) => {
|
||||
if (total === 0) return '-';
|
||||
const pct = passed/total*100;
|
||||
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||
};
|
||||
|
||||
// Meta
|
||||
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||
|
||||
// Cards
|
||||
const models = [...new Set(DATA.map(r => r.model))];
|
||||
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||
const bestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.avg - a.avg)[0];
|
||||
const fastestModel = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||
}).sort((a,b) => b.speed - a.speed)[0];
|
||||
|
||||
document.getElementById('cards').innerHTML = `
|
||||
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||
`;
|
||||
|
||||
// Summary table
|
||||
const sumHead = document.querySelector('#summary-table thead');
|
||||
const sumBody = document.querySelector('#summary-table tbody');
|
||||
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||
|
||||
const modelRows = models.map(m => {
|
||||
const mrs = DATA.filter(r => r.model === m);
|
||||
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||
const scenCols = scenarios.map(s => {
|
||||
const r = mrs.find(r => r.scenario === s);
|
||||
if (!r) return '<td>-</td>';
|
||||
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||
}).join('');
|
||||
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||
}).sort((a,b) => b.avg - a.avg);
|
||||
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||
|
||||
// Results table
|
||||
const resHead = document.querySelector('#results-table thead');
|
||||
const resBody = document.querySelector('#results-table tbody');
|
||||
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||
|
||||
let sortCol = 9, sortAsc = false;
|
||||
function renderResults() {
|
||||
const sorted = [...DATA].sort((a,b) => {
|
||||
const vals = [
|
||||
[a.model, b.model],
|
||||
[a.scenario, b.scenario],
|
||||
[a.specEntities, b.specEntities],
|
||||
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||
[a.fixRounds, b.fixRounds],
|
||||
[a.promptTokensEst, b.promptTokensEst],
|
||||
[a.totalTokens, b.totalTokens],
|
||||
[a.totalDurationMs, b.totalDurationMs],
|
||||
[a.avgTokPerSec, b.avgTokPerSec],
|
||||
[a.score, b.score],
|
||||
][sortCol];
|
||||
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||
return sortAsc ? cmp : -cmp;
|
||||
});
|
||||
resBody.innerHTML = sorted.map(r => {
|
||||
const c = cls(r);
|
||||
return `<tr>
|
||||
<td class="model-name">${r.model}</td>
|
||||
<td>${r.scenario}</td>
|
||||
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||
</tr>`;
|
||||
}).join('');
|
||||
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||
});
|
||||
}
|
||||
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||
const col = parseInt(e.target.dataset.col);
|
||||
if (isNaN(col)) return;
|
||||
if (sortCol === col) sortAsc = !sortAsc;
|
||||
else { sortCol = col; sortAsc = false; }
|
||||
renderResults();
|
||||
});
|
||||
renderResults();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1
kipina-codebench/results/2026-04-14T21-50-42.json
Normal file
1
kipina-codebench/results/2026-04-14T21-50-42.json
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
Reference in New Issue
Block a user