Poistettu virheelliset 8b Go-tulokset (väärä promptti: code-small → Python)
This commit is contained in:
183
kipina-codebench/results/2026-04-14T13-11.html
Normal file
183
kipina-codebench/results/2026-04-14T13-11.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":10508,"promptTokensEst":2627,"score":0,"stars":"","error":"Puuttuvat: Cargo.toml, src/models.rs, src/handlers.rs, src/lib.rs, src/main.rs, tests/api_test.rs","round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":3},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":4},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
107
kipina-codebench/results/2026-04-14T13-11.json
Normal file
107
kipina-codebench/results/2026-04-14T13-11.json
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 1,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 10508,
|
||||||
|
"promptTokensEst": 2627,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "Puuttuvat: Cargo.toml, src/models.rs, src/handlers.rs, src/lib.rs, src/main.rs, tests/api_test.rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": false,
|
||||||
|
"specEntities": 0,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "JSON-speksi epäonnistui",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": false,
|
||||||
|
"specEntities": 0,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "JSON-speksi epäonnistui",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": false,
|
||||||
|
"specEntities": 0,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "JSON-speksi epäonnistui",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:8b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": false,
|
||||||
|
"specEntities": 0,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "JSON-speksi epäonnistui",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T13-12.html
Normal file
183
kipina-codebench/results/2026-04-14T13-12.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":3,"testsPassed":0,"testsFailed":3,"totalDurationMs":217110,"totalTokens":21602,"avgTokPerSec":114.70956637458333,"promptChars":12612,"promptTokensEst":3153,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":3,"testsPassed":0,"testsFailed":3,"totalDurationMs":204772,"totalTokens":20717,"avgTokPerSec":114.45999021594592,"promptChars":12743,"promptTokensEst":3186,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":4,"testsPassed":0,"testsFailed":4,"totalDurationMs":180501,"totalTokens":18467,"avgTokPerSec":115.23583963958032,"promptChars":12392,"promptTokensEst":3098,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":25,"testsPassed":0,"testsFailed":25,"totalDurationMs":282681,"totalTokens":27665,"avgTokPerSec":111.29688837623901,"promptChars":12675,"promptTokensEst":3169,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":5,"testsPassed":0,"testsFailed":5,"totalDurationMs":171686,"totalTokens":17525,"avgTokPerSec":114.88288274375243,"promptChars":12618,"promptTokensEst":3155,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T13-12.json
Normal file
117
kipina-codebench/results/2026-04-14T13-12.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 3,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 3,
|
||||||
|
"totalDurationMs": 217110,
|
||||||
|
"totalTokens": 21602,
|
||||||
|
"avgTokPerSec": 114.70956637458333,
|
||||||
|
"promptChars": 12612,
|
||||||
|
"promptTokensEst": 3153,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 3,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 3,
|
||||||
|
"totalDurationMs": 204772,
|
||||||
|
"totalTokens": 20717,
|
||||||
|
"avgTokPerSec": 114.45999021594592,
|
||||||
|
"promptChars": 12743,
|
||||||
|
"promptTokensEst": 3186,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 4,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 4,
|
||||||
|
"totalDurationMs": 180501,
|
||||||
|
"totalTokens": 18467,
|
||||||
|
"avgTokPerSec": 115.23583963958032,
|
||||||
|
"promptChars": 12392,
|
||||||
|
"promptTokensEst": 3098,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 25,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 25,
|
||||||
|
"totalDurationMs": 282681,
|
||||||
|
"totalTokens": 27665,
|
||||||
|
"avgTokPerSec": 111.29688837623901,
|
||||||
|
"promptChars": 12675,
|
||||||
|
"promptTokensEst": 3169,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 5,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 5,
|
||||||
|
"totalDurationMs": 171686,
|
||||||
|
"totalTokens": 17525,
|
||||||
|
"avgTokPerSec": 114.88288274375243,
|
||||||
|
"promptChars": 12618,
|
||||||
|
"promptTokensEst": 3155,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T13-42.html
Normal file
183
kipina-codebench/results/2026-04-14T13-42.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":18,"testsPassed":0,"testsFailed":18,"totalDurationMs":208078,"totalTokens":20783,"avgTokPerSec":114.94478559756693,"promptChars":13278,"promptTokensEst":3320,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":13362,"promptTokensEst":3341,"score":0,"stars":"","error":"Puuttuvat: src/lib.rs, src/main.rs, tests/api_test.rs","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":9,"testsPassed":0,"testsFailed":9,"totalDurationMs":221174,"totalTokens":22354,"avgTokPerSec":114.09551344946065,"promptChars":13234,"promptTokensEst":3309,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":13317,"promptTokensEst":3329,"score":0,"stars":"","error":"Puuttuvat: src/lib.rs, src/main.rs, tests/api_test.rs","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":8795,"totalTokens":954,"avgTokPerSec":124.86009274372915,"promptChars":13335,"promptTokensEst":3334,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-rs","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
113
kipina-codebench/results/2026-04-14T13-42.json
Normal file
113
kipina-codebench/results/2026-04-14T13-42.json
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 18,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 18,
|
||||||
|
"totalDurationMs": 208078,
|
||||||
|
"totalTokens": 20783,
|
||||||
|
"avgTokPerSec": 114.94478559756693,
|
||||||
|
"promptChars": 13278,
|
||||||
|
"promptTokensEst": 3320,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 13362,
|
||||||
|
"promptTokensEst": 3341,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "Puuttuvat: src/lib.rs, src/main.rs, tests/api_test.rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 9,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 9,
|
||||||
|
"totalDurationMs": 221174,
|
||||||
|
"totalTokens": 22354,
|
||||||
|
"avgTokPerSec": 114.09551344946065,
|
||||||
|
"promptChars": 13234,
|
||||||
|
"promptTokensEst": 3309,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 13317,
|
||||||
|
"promptTokensEst": 3329,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "Puuttuvat: src/lib.rs, src/main.rs, tests/api_test.rs",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 8795,
|
||||||
|
"totalTokens": 954,
|
||||||
|
"avgTokPerSec": 124.86009274372915,
|
||||||
|
"promptChars": 13335,
|
||||||
|
"promptTokensEst": 3334,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T14-12.html
Normal file
183
kipina-codebench/results/2026-04-14T14-12.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":133173,"totalTokens":13174,"avgTokPerSec":117.52479437665707,"promptChars":14102,"promptTokensEst":3526,"score":30,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":5,"testsPassed":0,"testsFailed":5,"totalDurationMs":267561,"totalTokens":27021,"avgTokPerSec":113.5812238661422,"promptChars":14052,"promptTokensEst":3513,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":13914,"promptTokensEst":3479,"score":0,"stars":"","error":"Puuttuvat: src/handlers.rs, src/lib.rs, src/main.rs, tests/api_test.rs","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":2,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":162271,"totalTokens":16343,"avgTokPerSec":115.53039090208604,"promptChars":14062,"promptTokensEst":3516,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":211367,"totalTokens":21183,"avgTokPerSec":113.22772767359652,"promptChars":14038,"promptTokensEst":3510,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
115
kipina-codebench/results/2026-04-14T14-12.json
Normal file
115
kipina-codebench/results/2026-04-14T14-12.json
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 133173,
|
||||||
|
"totalTokens": 13174,
|
||||||
|
"avgTokPerSec": 117.52479437665707,
|
||||||
|
"promptChars": 14102,
|
||||||
|
"promptTokensEst": 3526,
|
||||||
|
"score": 30,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 5,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 5,
|
||||||
|
"totalDurationMs": 267561,
|
||||||
|
"totalTokens": 27021,
|
||||||
|
"avgTokPerSec": 113.5812238661422,
|
||||||
|
"promptChars": 14052,
|
||||||
|
"promptTokensEst": 3513,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 0,
|
||||||
|
"totalTokens": 0,
|
||||||
|
"avgTokPerSec": 0,
|
||||||
|
"promptChars": 13914,
|
||||||
|
"promptTokensEst": 3479,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "",
|
||||||
|
"error": "Puuttuvat: src/handlers.rs, src/lib.rs, src/main.rs, tests/api_test.rs",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 2,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 162271,
|
||||||
|
"totalTokens": 16343,
|
||||||
|
"avgTokPerSec": 115.53039090208604,
|
||||||
|
"promptChars": 14062,
|
||||||
|
"promptTokensEst": 3516,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 1,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 1,
|
||||||
|
"totalDurationMs": 211367,
|
||||||
|
"totalTokens": 21183,
|
||||||
|
"avgTokPerSec": 113.22772767359652,
|
||||||
|
"promptChars": 14038,
|
||||||
|
"promptTokensEst": 3510,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T14-38.html
Normal file
183
kipina-codebench/results/2026-04-14T14-38.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":38807,"totalTokens":5667,"avgTokPerSec":183.83891911423427,"promptChars":21818,"promptTokensEst":5455,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":178290,"totalTokens":26265,"avgTokPerSec":168.77786498646262,"promptChars":21840,"promptTokensEst":5460,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","profile":"large","promptName":"code-rs","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":151603,"totalTokens":22725,"avgTokPerSec":170.74115131582644,"promptChars":21750,"promptTokensEst":5438,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","profile":"large","promptName":"code-rs","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":41059,"totalTokens":6288,"avgTokPerSec":183.76827829344424,"promptChars":21848,"promptTokensEst":5462,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":187666,"totalTokens":27278,"avgTokPerSec":166.24197655672018,"promptChars":21694,"promptTokensEst":5424,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","profile":"large","promptName":"code-rs","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T14-38.json
Normal file
117
kipina-codebench/results/2026-04-14T14-38.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 38807,
|
||||||
|
"totalTokens": 5667,
|
||||||
|
"avgTokPerSec": 183.83891911423427,
|
||||||
|
"promptChars": 21818,
|
||||||
|
"promptTokensEst": 5455,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 178290,
|
||||||
|
"totalTokens": 26265,
|
||||||
|
"avgTokPerSec": 168.77786498646262,
|
||||||
|
"promptChars": 21840,
|
||||||
|
"promptTokensEst": 5460,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "Testit kaatuivat",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 151603,
|
||||||
|
"totalTokens": 22725,
|
||||||
|
"avgTokPerSec": 170.74115131582644,
|
||||||
|
"promptChars": 21750,
|
||||||
|
"promptTokensEst": 5438,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "Testit kaatuivat",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 41059,
|
||||||
|
"totalTokens": 6288,
|
||||||
|
"avgTokPerSec": 183.76827829344424,
|
||||||
|
"promptChars": 21848,
|
||||||
|
"promptTokensEst": 5462,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 3,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 187666,
|
||||||
|
"totalTokens": 27278,
|
||||||
|
"avgTokPerSec": 166.24197655672018,
|
||||||
|
"promptChars": 21694,
|
||||||
|
"promptTokensEst": 5424,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "Testit kaatuivat",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T14-52.html
Normal file
183
kipina-codebench/results/2026-04-14T14-52.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":4,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":231122,"totalTokens":22952,"avgTokPerSec":113.75113825466987,"promptChars":17604,"promptTokensEst":4401,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":5,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":260314,"totalTokens":26144,"avgTokPerSec":113.40388181735229,"promptChars":17539,"promptTokensEst":4385,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","profile":"large","promptName":"code-rs","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":4,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":227228,"totalTokens":22381,"avgTokPerSec":113.5362722539456,"promptChars":17630,"promptTokensEst":4408,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","profile":"large","promptName":"code-rs","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":102052,"totalTokens":9984,"avgTokPerSec":117.77973450501808,"promptChars":17571,"promptTokensEst":4393,"score":30,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":2,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":146321,"totalTokens":14445,"avgTokPerSec":115.61186488022163,"promptChars":17589,"promptTokensEst":4397,"score":20,"stars":"★☆☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T14-52.json
Normal file
117
kipina-codebench/results/2026-04-14T14-52.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 4,
|
||||||
|
"testsTotal": 1,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 1,
|
||||||
|
"totalDurationMs": 231122,
|
||||||
|
"totalTokens": 22952,
|
||||||
|
"avgTokPerSec": 113.75113825466987,
|
||||||
|
"promptChars": 17604,
|
||||||
|
"promptTokensEst": 4401,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 5,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 260314,
|
||||||
|
"totalTokens": 26144,
|
||||||
|
"avgTokPerSec": 113.40388181735229,
|
||||||
|
"promptChars": 17539,
|
||||||
|
"promptTokensEst": 4385,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "Testit kaatuivat",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 4,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 227228,
|
||||||
|
"totalTokens": 22381,
|
||||||
|
"avgTokPerSec": 113.5362722539456,
|
||||||
|
"promptChars": 17630,
|
||||||
|
"promptTokensEst": 4408,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "Testit kaatuivat",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 102052,
|
||||||
|
"totalTokens": 9984,
|
||||||
|
"avgTokPerSec": 117.77973450501808,
|
||||||
|
"promptChars": 17571,
|
||||||
|
"promptTokensEst": 4393,
|
||||||
|
"score": 30,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 2,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 146321,
|
||||||
|
"totalTokens": 14445,
|
||||||
|
"avgTokPerSec": 115.61186488022163,
|
||||||
|
"promptChars": 17589,
|
||||||
|
"promptTokensEst": 4397,
|
||||||
|
"score": 20,
|
||||||
|
"stars": "★☆☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T15-48.html
Normal file
183
kipina-codebench/results/2026-04-14T15-48.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
1
kipina-codebench/results/2026-04-14T15-48.json
Normal file
1
kipina-codebench/results/2026-04-14T15-48.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
[]
|
||||||
183
kipina-codebench/results/2026-04-14T15-51.html
Normal file
183
kipina-codebench/results/2026-04-14T15-51.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":130035,"totalTokens":6520,"avgTokPerSec":61.71030751274268,"promptChars":7556,"promptTokensEst":1889,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":1},{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":68507,"totalTokens":3808,"avgTokPerSec":62.98926789707312,"promptChars":16131,"promptTokensEst":4033,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":2},{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":148585,"totalTokens":8183,"avgTokPerSec":60.985378376436714,"promptChars":11852,"promptTokensEst":2963,"score":30,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-rs","round":3}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
71
kipina-codebench/results/2026-04-14T15-51.json
Normal file
71
kipina-codebench/results/2026-04-14T15-51.json
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "codestral:22b",
|
||||||
|
"scenario": "todo",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 130035,
|
||||||
|
"totalTokens": 6520,
|
||||||
|
"avgTokPerSec": 61.71030751274268,
|
||||||
|
"promptChars": 7556,
|
||||||
|
"promptTokensEst": 1889,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "codestral:22b",
|
||||||
|
"scenario": "todo",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 1,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 68507,
|
||||||
|
"totalTokens": 3808,
|
||||||
|
"avgTokPerSec": 62.98926789707312,
|
||||||
|
"promptChars": 16131,
|
||||||
|
"promptTokensEst": 4033,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "codestral:22b",
|
||||||
|
"scenario": "todo",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 148585,
|
||||||
|
"totalTokens": 8183,
|
||||||
|
"avgTokPerSec": 60.985378376436714,
|
||||||
|
"promptChars": 11852,
|
||||||
|
"promptTokensEst": 2963,
|
||||||
|
"score": 30,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-rs",
|
||||||
|
"round": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T16-27.html
Normal file
183
kipina-codebench/results/2026-04-14T16-27.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":101159,"totalTokens":9818,"avgTokPerSec":117.6413187791681,"promptChars":24501,"promptTokensEst":6125,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":111387,"totalTokens":10789,"avgTokPerSec":116.08909495915704,"promptChars":26133,"promptTokensEst":6533,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":116265,"totalTokens":10890,"avgTokPerSec":115.97514442822705,"promptChars":27525,"promptTokensEst":6881,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":117780,"totalTokens":11066,"avgTokPerSec":115.92491045384959,"promptChars":27275,"promptTokensEst":6819,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":110848,"totalTokens":10433,"avgTokPerSec":115.79955217404776,"promptChars":25670,"promptTokensEst":6418,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T16-27.json
Normal file
117
kipina-codebench/results/2026-04-14T16-27.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 12,
|
||||||
|
"testsPassed": 12,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 101159,
|
||||||
|
"totalTokens": 9818,
|
||||||
|
"avgTokPerSec": 117.6413187791681,
|
||||||
|
"promptChars": 24501,
|
||||||
|
"promptTokensEst": 6125,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 12,
|
||||||
|
"testsPassed": 12,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 111387,
|
||||||
|
"totalTokens": 10789,
|
||||||
|
"avgTokPerSec": 116.08909495915704,
|
||||||
|
"promptChars": 26133,
|
||||||
|
"promptTokensEst": 6533,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 116265,
|
||||||
|
"totalTokens": 10890,
|
||||||
|
"avgTokPerSec": 115.97514442822705,
|
||||||
|
"promptChars": 27525,
|
||||||
|
"promptTokensEst": 6881,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 117780,
|
||||||
|
"totalTokens": 11066,
|
||||||
|
"avgTokPerSec": 115.92491045384959,
|
||||||
|
"promptChars": 27275,
|
||||||
|
"promptTokensEst": 6819,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 12,
|
||||||
|
"testsPassed": 12,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 110848,
|
||||||
|
"totalTokens": 10433,
|
||||||
|
"avgTokPerSec": 115.79955217404776,
|
||||||
|
"promptChars": 25670,
|
||||||
|
"promptTokensEst": 6418,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T16-28.html
Normal file
183
kipina-codebench/results/2026-04-14T16-28.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3:32b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":240877,"totalTokens":3896,"avgTokPerSec":18.330483073242423,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-go","round":1},{"model":"qwen3:32b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":232792,"totalTokens":3768,"avgTokPerSec":18.174747642473534,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-go","round":2},{"model":"qwen3:32b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":246427,"totalTokens":3982,"avgTokPerSec":18.211912659152603,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-go","round":3},{"model":"qwen3:32b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":228096,"totalTokens":3696,"avgTokPerSec":18.319094437507093,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-go","round":4},{"model":"qwen3:32b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":228521,"totalTokens":3722,"avgTokPerSec":18.29875057445382,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"☆☆☆☆☆","error":"fetch failed","profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T16-28.json
Normal file
117
kipina-codebench/results/2026-04-14T16-28.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3:32b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 240877,
|
||||||
|
"totalTokens": 3896,
|
||||||
|
"avgTokPerSec": 18.330483073242423,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:32b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 232792,
|
||||||
|
"totalTokens": 3768,
|
||||||
|
"avgTokPerSec": 18.174747642473534,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:32b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 246427,
|
||||||
|
"totalTokens": 3982,
|
||||||
|
"avgTokPerSec": 18.211912659152603,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:32b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 228096,
|
||||||
|
"totalTokens": 3696,
|
||||||
|
"avgTokPerSec": 18.319094437507093,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3:32b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 228521,
|
||||||
|
"totalTokens": 3722,
|
||||||
|
"avgTokPerSec": 18.29875057445382,
|
||||||
|
"promptChars": 0,
|
||||||
|
"promptTokensEst": 0,
|
||||||
|
"score": 0,
|
||||||
|
"stars": "☆☆☆☆☆",
|
||||||
|
"error": "fetch failed",
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T17-58.html
Normal file
183
kipina-codebench/results/2026-04-14T17-58.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":90734,"totalTokens":8837,"avgTokPerSec":118.15403404653254,"promptChars":21527,"promptTokensEst":5382,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":91883,"totalTokens":8943,"avgTokPerSec":117.97076258455319,"promptChars":22052,"promptTokensEst":5513,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":94539,"totalTokens":8925,"avgTokPerSec":117.61954720611362,"promptChars":21972,"promptTokensEst":5493,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":91180,"totalTokens":8537,"avgTokPerSec":116.3158218746635,"promptChars":21508,"promptTokensEst":5377,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":142431,"totalTokens":14065,"avgTokPerSec":115.30180394322426,"promptChars":22423,"promptTokensEst":5606,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T17-58.json
Normal file
117
kipina-codebench/results/2026-04-14T17-58.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 90734,
|
||||||
|
"totalTokens": 8837,
|
||||||
|
"avgTokPerSec": 118.15403404653254,
|
||||||
|
"promptChars": 21527,
|
||||||
|
"promptTokensEst": 5382,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 91883,
|
||||||
|
"totalTokens": 8943,
|
||||||
|
"avgTokPerSec": 117.97076258455319,
|
||||||
|
"promptChars": 22052,
|
||||||
|
"promptTokensEst": 5513,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 94539,
|
||||||
|
"totalTokens": 8925,
|
||||||
|
"avgTokPerSec": 117.61954720611362,
|
||||||
|
"promptChars": 21972,
|
||||||
|
"promptTokensEst": 5493,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 91180,
|
||||||
|
"totalTokens": 8537,
|
||||||
|
"avgTokPerSec": 116.3158218746635,
|
||||||
|
"promptChars": 21508,
|
||||||
|
"promptTokensEst": 5377,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 142431,
|
||||||
|
"totalTokens": 14065,
|
||||||
|
"avgTokPerSec": 115.30180394322426,
|
||||||
|
"promptChars": 22423,
|
||||||
|
"promptTokensEst": 5606,
|
||||||
|
"score": 90,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T17-59.html
Normal file
183
kipina-codebench/results/2026-04-14T17-59.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":60602,"totalTokens":8923,"avgTokPerSec":179.27347290692742,"promptChars":21986,"promptTokensEst":5497,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":60628,"totalTokens":8990,"avgTokPerSec":179.75492148906073,"promptChars":22182,"promptTokensEst":5546,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":60120,"totalTokens":8913,"avgTokPerSec":179.69559472880027,"promptChars":22119,"promptTokensEst":5530,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":60745,"totalTokens":8972,"avgTokPerSec":179.4894978567171,"promptChars":22146,"promptTokensEst":5537,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":60264,"totalTokens":8945,"avgTokPerSec":179.78195301228135,"promptChars":21998,"promptTokensEst":5500,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T17-59.json
Normal file
117
kipina-codebench/results/2026-04-14T17-59.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60602,
|
||||||
|
"totalTokens": 8923,
|
||||||
|
"avgTokPerSec": 179.27347290692742,
|
||||||
|
"promptChars": 21986,
|
||||||
|
"promptTokensEst": 5497,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60628,
|
||||||
|
"totalTokens": 8990,
|
||||||
|
"avgTokPerSec": 179.75492148906073,
|
||||||
|
"promptChars": 22182,
|
||||||
|
"promptTokensEst": 5546,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60120,
|
||||||
|
"totalTokens": 8913,
|
||||||
|
"avgTokPerSec": 179.69559472880027,
|
||||||
|
"promptChars": 22119,
|
||||||
|
"promptTokensEst": 5530,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60745,
|
||||||
|
"totalTokens": 8972,
|
||||||
|
"avgTokPerSec": 179.4894978567171,
|
||||||
|
"promptChars": 22146,
|
||||||
|
"promptTokensEst": 5537,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60264,
|
||||||
|
"totalTokens": 8945,
|
||||||
|
"avgTokPerSec": 179.78195301228135,
|
||||||
|
"promptChars": 21998,
|
||||||
|
"promptTokensEst": 5500,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T18-11-01.html
Normal file
183
kipina-codebench/results/2026-04-14T18-11-01.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":93812,"totalTokens":8825,"avgTokPerSec":117.65658861683121,"promptChars":21711,"promptTokensEst":5428,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":95415,"totalTokens":8968,"avgTokPerSec":116.38601312871819,"promptChars":22272,"promptTokensEst":5568,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":94923,"totalTokens":8925,"avgTokPerSec":117.12905531929448,"promptChars":22083,"promptTokensEst":5521,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":97361,"totalTokens":9191,"avgTokPerSec":117.7917994223736,"promptChars":22528,"promptTokensEst":5632,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":87238,"totalTokens":8535,"avgTokPerSec":118.24773478067195,"promptChars":22021,"promptTokensEst":5505,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T18-11-01.json
Normal file
117
kipina-codebench/results/2026-04-14T18-11-01.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 93812,
|
||||||
|
"totalTokens": 8825,
|
||||||
|
"avgTokPerSec": 117.65658861683121,
|
||||||
|
"promptChars": 21711,
|
||||||
|
"promptTokensEst": 5428,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 95415,
|
||||||
|
"totalTokens": 8968,
|
||||||
|
"avgTokPerSec": 116.38601312871819,
|
||||||
|
"promptChars": 22272,
|
||||||
|
"promptTokensEst": 5568,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 94923,
|
||||||
|
"totalTokens": 8925,
|
||||||
|
"avgTokPerSec": 117.12905531929448,
|
||||||
|
"promptChars": 22083,
|
||||||
|
"promptTokensEst": 5521,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 97361,
|
||||||
|
"totalTokens": 9191,
|
||||||
|
"avgTokPerSec": 117.7917994223736,
|
||||||
|
"promptChars": 22528,
|
||||||
|
"promptTokensEst": 5632,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 87238,
|
||||||
|
"totalTokens": 8535,
|
||||||
|
"avgTokPerSec": 118.24773478067195,
|
||||||
|
"promptChars": 22021,
|
||||||
|
"promptTokensEst": 5505,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T18-11-03.html
Normal file
183
kipina-codebench/results/2026-04-14T18-11-03.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":60226,"totalTokens":8933,"avgTokPerSec":179.777657764066,"promptChars":22099,"promptTokensEst":5525,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":61248,"totalTokens":9044,"avgTokPerSec":179.5168548848639,"promptChars":22045,"promptTokensEst":5511,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":60744,"totalTokens":8936,"avgTokPerSec":177.805229869474,"promptChars":22103,"promptTokensEst":5526,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":61103,"totalTokens":9004,"avgTokPerSec":178.49315317367385,"promptChars":22240,"promptTokensEst":5560,"score":40,"stars":"★★☆☆☆","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":96365,"totalTokens":14294,"avgTokPerSec":174.47755518388152,"promptChars":22362,"promptTokensEst":5591,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T18-11-03.json
Normal file
117
kipina-codebench/results/2026-04-14T18-11-03.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60226,
|
||||||
|
"totalTokens": 8933,
|
||||||
|
"avgTokPerSec": 179.777657764066,
|
||||||
|
"promptChars": 22099,
|
||||||
|
"promptTokensEst": 5525,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 61248,
|
||||||
|
"totalTokens": 9044,
|
||||||
|
"avgTokPerSec": 179.5168548848639,
|
||||||
|
"promptChars": 22045,
|
||||||
|
"promptTokensEst": 5511,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 60744,
|
||||||
|
"totalTokens": 8936,
|
||||||
|
"avgTokPerSec": 177.805229869474,
|
||||||
|
"promptChars": 22103,
|
||||||
|
"promptTokensEst": 5526,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 0,
|
||||||
|
"testsPassed": 0,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 61103,
|
||||||
|
"totalTokens": 9004,
|
||||||
|
"avgTokPerSec": 178.49315317367385,
|
||||||
|
"promptChars": 22240,
|
||||||
|
"promptTokensEst": 5560,
|
||||||
|
"score": 40,
|
||||||
|
"stars": "★★☆☆☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 96365,
|
||||||
|
"totalTokens": 14294,
|
||||||
|
"avgTokPerSec": 174.47755518388152,
|
||||||
|
"promptChars": 22362,
|
||||||
|
"promptTokensEst": 5591,
|
||||||
|
"score": 90,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T18-21-28.html
Normal file
183
kipina-codebench/results/2026-04-14T18-21-28.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":58681,"totalTokens":5655,"avgTokPerSec":118.77562779075755,"promptChars":17612,"promptTokensEst":4403,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":2,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":169807,"totalTokens":16883,"avgTokPerSec":115.96768040993769,"promptChars":17644,"promptTokensEst":4411,"score":80,"stars":"★★★★☆","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":117405,"totalTokens":11271,"avgTokPerSec":116.36291292849495,"promptChars":17615,"promptTokensEst":4404,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":114898,"totalTokens":10974,"avgTokPerSec":116.03234661922525,"promptChars":17614,"promptTokensEst":4404,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":121791,"totalTokens":12040,"avgTokPerSec":116.40843044337468,"promptChars":17798,"promptTokensEst":4450,"score":90,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T18-21-28.json
Normal file
117
kipina-codebench/results/2026-04-14T18-21-28.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 58681,
|
||||||
|
"totalTokens": 5655,
|
||||||
|
"avgTokPerSec": 118.77562779075755,
|
||||||
|
"promptChars": 17612,
|
||||||
|
"promptTokensEst": 4403,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 2,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 169807,
|
||||||
|
"totalTokens": 16883,
|
||||||
|
"avgTokPerSec": 115.96768040993769,
|
||||||
|
"promptChars": 17644,
|
||||||
|
"promptTokensEst": 4411,
|
||||||
|
"score": 80,
|
||||||
|
"stars": "★★★★☆",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 117405,
|
||||||
|
"totalTokens": 11271,
|
||||||
|
"avgTokPerSec": 116.36291292849495,
|
||||||
|
"promptChars": 17615,
|
||||||
|
"promptTokensEst": 4404,
|
||||||
|
"score": 90,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 114898,
|
||||||
|
"totalTokens": 10974,
|
||||||
|
"avgTokPerSec": 116.03234661922525,
|
||||||
|
"promptChars": 17614,
|
||||||
|
"promptTokensEst": 4404,
|
||||||
|
"score": 90,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 1,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 121791,
|
||||||
|
"totalTokens": 12040,
|
||||||
|
"avgTokPerSec": 116.40843044337468,
|
||||||
|
"promptChars": 17798,
|
||||||
|
"promptTokensEst": 4450,
|
||||||
|
"score": 90,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
183
kipina-codebench/results/2026-04-14T18-21-44.html
Normal file
183
kipina-codebench/results/2026-04-14T18-21-44.html
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="fi">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Kipina Model Benchmark</title>
|
||||||
|
<style>
|
||||||
|
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
|
||||||
|
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
||||||
|
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
|
||||||
|
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||||
|
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
|
||||||
|
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
|
||||||
|
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
|
||||||
|
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
|
||||||
|
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
|
||||||
|
th:hover { color: var(--text); }
|
||||||
|
th.sorted-asc::after { content: ' ▲'; }
|
||||||
|
th.sorted-desc::after { content: ' ▼'; }
|
||||||
|
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
|
||||||
|
tr:hover td { background: #1c2128; }
|
||||||
|
.pass { color: var(--green); }
|
||||||
|
.partial { color: var(--yellow); }
|
||||||
|
.fail { color: var(--red); }
|
||||||
|
.stars { letter-spacing: 1px; }
|
||||||
|
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
|
||||||
|
.bar-bg { background: var(--border); }
|
||||||
|
.bar-fill { background: var(--green); }
|
||||||
|
.bar-partial { background: var(--yellow); }
|
||||||
|
.model-name { font-weight: 600; }
|
||||||
|
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
|
||||||
|
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Kipina Model Benchmark</h1>
|
||||||
|
<div class="meta" id="meta"></div>
|
||||||
|
|
||||||
|
<div class="cards" id="cards"></div>
|
||||||
|
|
||||||
|
<h2>Mallikohtainen yhteenveto</h2>
|
||||||
|
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<h2>Kaikki tulokset</h2>
|
||||||
|
<table id="results-table"><thead></thead><tbody></tbody></table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const RAW = [{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":38613,"totalTokens":5749,"avgTokPerSec":186.0062849599216,"promptChars":17610,"promptTokensEst":4403,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":44003,"totalTokens":6531,"avgTokPerSec":180.46364119019972,"promptChars":17654,"promptTokensEst":4414,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":43321,"totalTokens":6433,"avgTokPerSec":183.23569602310963,"promptChars":17661,"promptTokensEst":4415,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":43423,"totalTokens":6366,"avgTokPerSec":183.20013867919306,"promptChars":17622,"promptTokensEst":4406,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":36998,"totalTokens":5388,"avgTokPerSec":180.07304484079054,"promptChars":17641,"promptTokensEst":4410,"score":100,"stars":"★★★★★","error":null,"profile":"large","promptName":"code-go","round":5}];
|
||||||
|
|
||||||
|
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
|
||||||
|
function calcScore(r) {
|
||||||
|
if (r.error && r.testsTotal === 0) return 0;
|
||||||
|
let s = 0;
|
||||||
|
if (r.specOk) s += 10;
|
||||||
|
if (!r.error || r.testsTotal > 0) s += 10;
|
||||||
|
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
|
||||||
|
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
|
||||||
|
return Math.min(100, s);
|
||||||
|
}
|
||||||
|
// Laske pisteet jos puuttuvat
|
||||||
|
const DATA = RAW.map(r => {
|
||||||
|
if (r.score == null) r.score = calcScore(r);
|
||||||
|
if (!r.stars) r.stars = starsFor(r.score);
|
||||||
|
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
|
||||||
|
const pctBar = (passed, total, w=80) => {
|
||||||
|
if (total === 0) return '-';
|
||||||
|
const pct = passed/total*100;
|
||||||
|
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
|
||||||
|
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Meta
|
||||||
|
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
|
||||||
|
|
||||||
|
// Cards
|
||||||
|
const models = [...new Set(DATA.map(r => r.model))];
|
||||||
|
const scenarios = [...new Set(DATA.map(r => r.scenario))];
|
||||||
|
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
|
||||||
|
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
|
||||||
|
const bestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.avg - a.avg)[0];
|
||||||
|
const fastestModel = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
|
||||||
|
}).sort((a,b) => b.speed - a.speed)[0];
|
||||||
|
|
||||||
|
document.getElementById('cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
|
||||||
|
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
|
||||||
|
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
|
||||||
|
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
|
||||||
|
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
|
||||||
|
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
const sumHead = document.querySelector('#summary-table thead');
|
||||||
|
const sumBody = document.querySelector('#summary-table tbody');
|
||||||
|
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
|
||||||
|
|
||||||
|
const modelRows = models.map(m => {
|
||||||
|
const mrs = DATA.filter(r => r.model === m);
|
||||||
|
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
|
||||||
|
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
|
||||||
|
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
|
||||||
|
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
|
||||||
|
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
|
||||||
|
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
|
||||||
|
const scenCols = scenarios.map(s => {
|
||||||
|
const r = mrs.find(r => r.scenario === s);
|
||||||
|
if (!r) return '<td>-</td>';
|
||||||
|
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
|
||||||
|
}).join('');
|
||||||
|
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
|
||||||
|
}).sort((a,b) => b.avg - a.avg);
|
||||||
|
sumBody.innerHTML = modelRows.map(r => r.html).join('');
|
||||||
|
|
||||||
|
// Results table
|
||||||
|
const resHead = document.querySelector('#results-table thead');
|
||||||
|
const resBody = document.querySelector('#results-table tbody');
|
||||||
|
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
|
||||||
|
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
|
||||||
|
|
||||||
|
let sortCol = 9, sortAsc = false;
|
||||||
|
function renderResults() {
|
||||||
|
const sorted = [...DATA].sort((a,b) => {
|
||||||
|
const vals = [
|
||||||
|
[a.model, b.model],
|
||||||
|
[a.scenario, b.scenario],
|
||||||
|
[a.specEntities, b.specEntities],
|
||||||
|
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
|
||||||
|
[a.fixRounds, b.fixRounds],
|
||||||
|
[a.promptTokensEst, b.promptTokensEst],
|
||||||
|
[a.totalTokens, b.totalTokens],
|
||||||
|
[a.totalDurationMs, b.totalDurationMs],
|
||||||
|
[a.avgTokPerSec, b.avgTokPerSec],
|
||||||
|
[a.score, b.score],
|
||||||
|
][sortCol];
|
||||||
|
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
|
||||||
|
return sortAsc ? cmp : -cmp;
|
||||||
|
});
|
||||||
|
resBody.innerHTML = sorted.map(r => {
|
||||||
|
const c = cls(r);
|
||||||
|
return `<tr>
|
||||||
|
<td class="model-name">${r.model}</td>
|
||||||
|
<td>${r.scenario}</td>
|
||||||
|
<td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
|
||||||
|
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
|
||||||
|
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
|
||||||
|
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
|
||||||
|
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
|
||||||
|
<td>${r.avgTokPerSec.toFixed(0)}</td>
|
||||||
|
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join('');
|
||||||
|
document.querySelectorAll('#results-table th').forEach((th,i) => {
|
||||||
|
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
document.querySelector('#results-table thead').addEventListener('click', e => {
|
||||||
|
const col = parseInt(e.target.dataset.col);
|
||||||
|
if (isNaN(col)) return;
|
||||||
|
if (sortCol === col) sortAsc = !sortAsc;
|
||||||
|
else { sortCol = col; sortAsc = false; }
|
||||||
|
renderResults();
|
||||||
|
});
|
||||||
|
renderResults();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
117
kipina-codebench/results/2026-04-14T18-21-44.json
Normal file
117
kipina-codebench/results/2026-04-14T18-21-44.json
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 38613,
|
||||||
|
"totalTokens": 5749,
|
||||||
|
"avgTokPerSec": 186.0062849599216,
|
||||||
|
"promptChars": 17610,
|
||||||
|
"promptTokensEst": 4403,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 44003,
|
||||||
|
"totalTokens": 6531,
|
||||||
|
"avgTokPerSec": 180.46364119019972,
|
||||||
|
"promptChars": 17654,
|
||||||
|
"promptTokensEst": 4414,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 43321,
|
||||||
|
"totalTokens": 6433,
|
||||||
|
"avgTokPerSec": 183.23569602310963,
|
||||||
|
"promptChars": 17661,
|
||||||
|
"promptTokensEst": 4415,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 8,
|
||||||
|
"testsPassed": 8,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 43423,
|
||||||
|
"totalTokens": 6366,
|
||||||
|
"avgTokPerSec": 183.20013867919306,
|
||||||
|
"promptChars": 17622,
|
||||||
|
"promptTokensEst": 4406,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "qwen3-coder:30b",
|
||||||
|
"scenario": "blog",
|
||||||
|
"reqOk": true,
|
||||||
|
"specOk": true,
|
||||||
|
"specEntities": 2,
|
||||||
|
"validationIssues": 0,
|
||||||
|
"fixRounds": 0,
|
||||||
|
"testsTotal": 7,
|
||||||
|
"testsPassed": 7,
|
||||||
|
"testsFailed": 0,
|
||||||
|
"totalDurationMs": 36998,
|
||||||
|
"totalTokens": 5388,
|
||||||
|
"avgTokPerSec": 180.07304484079054,
|
||||||
|
"promptChars": 17641,
|
||||||
|
"promptTokensEst": 4410,
|
||||||
|
"score": 100,
|
||||||
|
"stars": "★★★★★",
|
||||||
|
"error": null,
|
||||||
|
"profile": "large",
|
||||||
|
"promptName": "code-go",
|
||||||
|
"round": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user