diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs
index f49da1e..ef451dc 100644
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -46,9 +46,24 @@ function loadPrompt(name) {
 }
 const CLIENT_SYSTEM = loadPrompt('client');
 const SPEC_SYSTEM = loadPrompt('spec');
-const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
 const FIX_SYSTEM = loadPrompt('fix');
 
+// === Mallikohtaiset profiilit ===
+const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-8'));
+function getCodePromptForModel(model) {
+    const modelConf = PROFILES.models[model];
+    const profile = modelConf?.profile || PROFILES.default_profile;
+    const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
+    const suffix = LANG === 'rust' ? '-rs' : '';
+    // Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
+    const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
+    for (const name of candidates) {
+        const path = join(__dirname, 'prompts', `${name}.md`);
+        if (existsSync(path)) return { system: readFileSync(path, 'utf-8').trim(), promptName: name, profile };
+    }
+    return { system: loadPrompt('code'), promptName: 'code', profile: 'large' };
+}
+
 // === Kultaisten esimerkkien lataus (kielen mukaan) ===
 const GOLDEN_DIR = join(__dirname, 'golden-examples');
 const LANG_CONFIG = {
@@ -281,6 +296,7 @@ async function runPipeline(model, scenario) {
         error: null,
     };
     const timings = [];
+    const { system: CODE_SYSTEM, promptName, profile } = getCodePromptForModel(model);
     const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
     mkdirSync(dir, { recursive: true });
 
@@ -444,6 +460,8 @@ async function runPipeline(model, scenario) {
     result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
     result.score = scoreResult(result);
     result.stars = starsForScore(result.score);
+    result.profile = profile;
+    result.promptName = promptName;
 
     return result;
 }
diff --git a/kipina-codebench/profiles.json b/kipina-codebench/profiles.json
new file mode 100644
index 0000000..6ed57dd
--- /dev/null
+++ b/kipina-codebench/profiles.json
@@ -0,0 +1,47 @@
+{
+  "models": {
+    "qwen3-coder:30b": {
+      "profile": "large",
+      "prompt": "code",
+      "golden": "todo.md",
+      "notes": "Pääkooderi. 97p, 188 tok/s. Noudattaa pitkiä sääntölistoja."
+    },
+    "qwen3:8b": {
+      "profile": "small",
+      "prompt": "code-small",
+      "golden": "todo.md",
+      "notes": "Kevyt vaihtoehto. Todo/users 100p, blog heikko. Lyhyt prompti toimii paremmin."
+    },
+    "qwen3:14b": {
+      "profile": "large",
+      "prompt": "code",
+      "golden": "todo.md",
+      "notes": "Poistettu käytöstä. Ei lisäarvoa 30b:hen verrattuna."
+    },
+    "codestral:22b": {
+      "profile": "large",
+      "prompt": "code",
+      "golden": "todo.md",
+      "notes": "Mistral-varamalli. 88p, 44 tok/s."
+    },
+    "qwen3:4b": {
+      "profile": "small",
+      "prompt": "code-small",
+      "golden": "todo.md",
+      "notes": "Minimaali. Vain todo toimii."
+    }
+  },
+  "profiles": {
+    "large": {
+      "prompt": "code",
+      "golden": "todo.md",
+      "description": "Täysi prompti + säännöt. Malleille >=14B."
+    },
+    "small": {
+      "prompt": "code-small",
+      "golden": "todo.md",
+      "description": "Tiivistetty prompti. Malleille <=8B."
+    }
+  },
+  "default_profile": "large"
+}
diff --git a/kipina-codebench/prompts/code-small.md b/kipina-codebench/prompts/code-small.md
new file mode 100644
index 0000000..9a17ecc
--- /dev/null
+++ b/kipina-codebench/prompts/code-small.md
@@ -0,0 +1,22 @@
+Generate a FastAPI project with SQLAlchemy and SQLite. Follow the REFERENCE IMPLEMENTATION exactly.
+
+Generate these 4 files with === markers:
+
+=== models.py ===
+=== schemas.py ===
+=== main.py ===
+=== test_main.py ===
+
+Key patterns (copy from reference):
+- class Base(DeclarativeBase): pass
+- Mapped[str] = mapped_column(String(255))
+- Mapped[str | None] = mapped_column(Text, default=None)
+- model_config = ConfigDict(from_attributes=True)
+- model_dump() not dict()
+- POST 201, GET list, GET by id 404, PUT, DELETE 204
+
+CRITICAL:
+- Use ONLY fields from the JSON spec — no created_at or extra fields
+- Generate EXACTLY 6 tests per entity: create, list, get_by_id, not_found, update, delete
+- No search, filter, or other extra tests
+- No markdown fences in output
diff --git a/kipina-codebench/results/2026-04-14T09-43.html b/kipina-codebench/results/2026-04-14T09-43.html
new file mode 100644
index 0000000..e77d625
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-43.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":21688,"totalTokens":2243,"avgTokPerSec":121.7719614197307,"promptChars":11588,"promptTokensEst":2897,"score":100,"stars":"★★★★★","error":null}];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T09-43.json b/kipina-codebench/results/2026-04-14T09-43.json
new file mode 100644
index 0000000..c54701c
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-43.json
@@ -0,0 +1,22 @@
+[
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 21688,
+    "totalTokens": 2243,
+    "avgTokPerSec": 121.7719614197307,
+    "promptChars": 11588,
+    "promptTokensEst": 2897,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null
+  }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-44.html b/kipina-codebench/results/2026-04-14T09-44.html
new file mode 100644
index 0000000..b8ea684
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-44.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":23521,"totalTokens":2090,"avgTokPerSec":100.94324085271073,"promptChars":10962,"promptTokensEst":2741,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":1,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":33680,"totalTokens":3003,"avgTokPerSec":100.52754588753601,"promptChars":10171,"promptTokensEst":2543,"score":90,"stars":"★★★★★","error":null},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui"}];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T09-44.json b/kipina-codebench/results/2026-04-14T09-44.json
new file mode 100644
index 0000000..b011487
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-44.json
@@ -0,0 +1,62 @@
+[
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 23521,
+    "totalTokens": 2090,
+    "avgTokPerSec": 100.94324085271073,
+    "promptChars": 10962,
+    "promptTokensEst": 2741,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 1,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 33680,
+    "totalTokens": 3003,
+    "avgTokPerSec": 100.52754588753601,
+    "promptChars": 10171,
+    "promptTokensEst": 2543,
+    "score": 90,
+    "stars": "★★★★★",
+    "error": null
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui"
+  }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-47.html b/kipina-codebench/results/2026-04-14T09-47.html
new file mode 100644
index 0000000..448c02e
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-47.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":97470,"totalTokens":8786,"avgTokPerSec":97.96636139685832,"promptChars":11290,"promptTokensEst":2823,"score":65,"stars":"★★★☆☆","error":null},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":18951,"totalTokens":1666,"avgTokPerSec":101.807593927545,"promptChars":10293,"promptTokensEst":2573,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":126005,"totalTokens":11056,"avgTokPerSec":96.6373549161171,"promptChars":11878,"promptTokensEst":2970,"score":20,"stars":"★☆☆☆☆","error":"Syntaksivirhe"}];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T09-47.json b/kipina-codebench/results/2026-04-14T09-47.json
new file mode 100644
index 0000000..0275dd6
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-47.json
@@ -0,0 +1,62 @@
+[
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 8,
+    "testsPassed": 6,
+    "testsFailed": 2,
+    "totalDurationMs": 97470,
+    "totalTokens": 8786,
+    "avgTokPerSec": 97.96636139685832,
+    "promptChars": 11290,
+    "promptTokensEst": 2823,
+    "score": 65,
+    "stars": "★★★☆☆",
+    "error": null
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 18951,
+    "totalTokens": 1666,
+    "avgTokPerSec": 101.807593927545,
+    "promptChars": 10293,
+    "promptTokensEst": 2573,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 1,
+    "testsPassed": 0,
+    "testsFailed": 1,
+    "totalDurationMs": 126005,
+    "totalTokens": 11056,
+    "avgTokPerSec": 96.6373549161171,
+    "promptChars": 11878,
+    "promptTokensEst": 2970,
+    "score": 20,
+    "stars": "★☆☆☆☆",
+    "error": "Syntaksivirhe"
+  }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-52.html b/kipina-codebench/results/2026-04-14T09-52.html
new file mode 100644
index 0000000..562f3ec
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-52.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":25444,"totalTokens":2661,"avgTokPerSec":122.06801173056196,"promptChars":11849,"promptTokensEst":2962,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":24447,"totalTokens":2537,"avgTokPerSec":121.11837170891442,"promptChars":11045,"promptTokensEst":2761,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":38071,"totalTokens":3965,"avgTokPerSec":120.37309655579647,"promptChars":12702,"promptTokensEst":3176,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":38459,"totalTokens":2106,"avgTokPerSec":60.889088461567745,"promptChars":10951,"promptTokensEst":2738,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":35959,"totalTokens":1966,"avgTokPerSec":60.9684885562545,"promptChars":10698,"promptTokensEst":2675,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":13,"testsPassed":2,"testsFailed":11,"totalDurationMs":269370,"totalTokens":14361,"avgTokPerSec":57.79069860126629,"promptChars":11838,"promptTokensEst":2960,"score":29,"stars":"★★☆☆☆","error":null,"round":1},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":23199,"totalTokens":2054,"avgTokPerSec":101.09280595816365,"promptChars":10854,"promptTokensEst":2714,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":72665,"totalTokens":6586,"avgTokPerSec":99.40636298490288,"promptChars":10157,"promptTokensEst":2539,"score":20,"stars":"★☆☆☆☆","error":"Syntaksivirhe","round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":136309,"totalTokens":12036,"avgTokPerSec":97.02525169408467,"promptChars":10823,"promptTokensEst":2706,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":1},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":28177,"totalTokens":2946,"avgTokPerSec":121.23541038097,"promptChars":11836,"promptTokensEst":2959,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":22631,"totalTokens":2352,"avgTokPerSec":121.93930190168658,"promptChars":10440,"promptTokensEst":2610,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":40394,"totalTokens":4225,"avgTokPerSec":120.84107397324551,"promptChars":12362,"promptTokensEst":3091,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":46081,"totalTokens":2542,"avgTokPerSec":60.93046828700026,"promptChars":11412,"promptTokensEst":2853,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":41323,"totalTokens":2272,"avgTokPerSec":60.99406174164295,"promptChars":10884,"promptTokensEst":2721,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":14,"testsPassed":2,"testsFailed":12,"totalDurationMs":262591,"totalTokens":14129,"avgTokPerSec":57.91340837830759,"promptChars":12143,"promptTokensEst":3036,"score":29,"stars":"★★☆☆☆","error":null,"round":2},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":24007,"totalTokens":2137,"avgTokPerSec":101.05982103292858,"promptChars":10756,"promptTokensEst":2689,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":68739,"totalTokens":6199,"avgTokPerSec":98.9825675198183,"promptChars":10313,"promptTokensEst":2578,"score":71,"stars":"★★★★☆","error":null,"round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":2},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":23472,"totalTokens":2427,"avgTokPerSec":120.85293828875076,"promptChars":11663,"promptTokensEst":2916,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":25864,"totalTokens":2671,"avgTokPerSec":120.6883137195962,"promptChars":11148,"promptTokensEst":2787,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":41074,"totalTokens":4275,"avgTokPerSec":120.33351485161673,"promptChars":12664,"promptTokensEst":3166,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":40457,"totalTokens":2229,"avgTokPerSec":61.093615619948345,"promptChars":10905,"promptTokensEst":2726,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":1,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":77506,"totalTokens":4268,"avgTokPerSec":60.19655522627278,"promptChars":11135,"promptTokensEst":2784,"score":90,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":74791,"totalTokens":3590,"avgTokPerSec":60.549298891176214,"promptChars":11653,"promptTokensEst":2913,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":26402,"totalTokens":2358,"avgTokPerSec":100.76936895480246,"promptChars":11243,"promptTokensEst":2811,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":20751,"totalTokens":1837,"avgTokPerSec":101.05480893032836,"promptChars":10553,"promptTokensEst":2638,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":3},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":22098,"totalTokens":2283,"avgTokPerSec":121.81254413612446,"promptChars":11503,"promptTokensEst":2876,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":2,"testsTotal":8,"testsPassed":8,"testsFailed":0,"totalDurationMs":65403,"totalTokens":6779,"avgTokPerSec":118.13288294758586,"promptChars":10939,"promptTokensEst":2735,"score":80,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":10,"testsPassed":10,"testsFailed":0,"totalDurationMs":36044,"totalTokens":3748,"avgTokPerSec":120.14822967005487,"promptChars":12639,"promptTokensEst":3160,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":38501,"totalTokens":2113,"avgTokPerSec":61.01814139430428,"promptChars":10929,"promptTokensEst":2732,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":8,"testsPassed":1,"testsFailed":7,"totalDurationMs":147057,"totalTokens":7799,"avgTokPerSec":56.209406465865904,"promptChars":11207,"promptTokensEst":2802,"score":28,"stars":"★★☆☆☆","error":null,"round":4},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":227508,"totalTokens":12026,"avgTokPerSec":58.52888492610325,"promptChars":11809,"promptTokensEst":2952,"score":80,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":131964,"totalTokens":11403,"avgTokPerSec":97.10963264920952,"promptChars":11786,"promptTokensEst":2947,"score":80,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":38820,"totalTokens":1826,"avgTokPerSec":101.07773707712924,"promptChars":10568,"promptTokensEst":2642,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":4},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":1,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":39797,"totalTokens":3776,"avgTokPerSec":120.91801837211113,"promptChars":11435,"promptTokensEst":2859,"score":90,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":9,"testsPassed":8,"testsFailed":1,"totalDurationMs":87836,"totalTokens":9343,"avgTokPerSec":119.28783662683314,"promptChars":10718,"promptTokensEst":2680,"score":73,"stars":"★★★★☆","error":null,"round":5},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":10,"testsPassed":10,"testsFailed":0,"totalDurationMs":36644,"totalTokens":3897,"avgTokPerSec":122.28607796191666,"promptChars":12598,"promptTokensEst":3150,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:14b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":1,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":127532,"totalTokens":3919,"avgTokPerSec":34.13133325491828,"promptChars":11352,"promptTokensEst":2838,"score":90,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:14b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":217365,"totalTokens":7764,"avgTokPerSec":38.67613170588518,"promptChars":10834,"promptTokensEst":2709,"score":65,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3:14b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":14,"testsPassed":7,"testsFailed":7,"totalDurationMs":248311,"totalTokens":13443,"avgTokPerSec":58.05680015263308,"promptChars":12219,"promptTokensEst":3055,"score":50,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":38326,"totalTokens":2079,"avgTokPerSec":100.89778087504016,"promptChars":10908,"promptTokensEst":2727,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":60823,"totalTokens":1772,"avgTokPerSec":96.76383996716295,"promptChars":10378,"promptTokensEst":2595,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":81654,"totalTokens":3458,"avgTokPerSec":95.65675360193613,"promptChars":11914,"promptTokensEst":2979,"score":100,"stars":"★★★★★","error":null,"round":5}];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T09-52.json b/kipina-codebench/results/2026-04-14T09-52.json
new file mode 100644
index 0000000..4e366ef
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-52.json
@@ -0,0 +1,947 @@
+[
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 25444,
+    "totalTokens": 2661,
+    "avgTokPerSec": 122.06801173056196,
+    "promptChars": 11849,
+    "promptTokensEst": 2962,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 24447,
+    "totalTokens": 2537,
+    "avgTokPerSec": 121.11837170891442,
+    "promptChars": 11045,
+    "promptTokensEst": 2761,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 11,
+    "testsPassed": 11,
+    "testsFailed": 0,
+    "totalDurationMs": 38071,
+    "totalTokens": 3965,
+    "avgTokPerSec": 120.37309655579647,
+    "promptChars": 12702,
+    "promptTokensEst": 3176,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 38459,
+    "totalTokens": 2106,
+    "avgTokPerSec": 60.889088461567745,
+    "promptChars": 10951,
+    "promptTokensEst": 2738,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 35959,
+    "totalTokens": 1966,
+    "avgTokPerSec": 60.9684885562545,
+    "promptChars": 10698,
+    "promptTokensEst": 2675,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 13,
+    "testsPassed": 2,
+    "testsFailed": 11,
+    "totalDurationMs": 269370,
+    "totalTokens": 14361,
+    "avgTokPerSec": 57.79069860126629,
+    "promptChars": 11838,
+    "promptTokensEst": 2960,
+    "score": 29,
+    "stars": "★★☆☆☆",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 23199,
+    "totalTokens": 2054,
+    "avgTokPerSec": 101.09280595816365,
+    "promptChars": 10854,
+    "promptTokensEst": 2714,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 1,
+    "testsPassed": 0,
+    "testsFailed": 1,
+    "totalDurationMs": 72665,
+    "totalTokens": 6586,
+    "avgTokPerSec": 99.40636298490288,
+    "promptChars": 10157,
+    "promptTokensEst": 2539,
+    "score": 20,
+    "stars": "★☆☆☆☆",
+    "error": "Syntaksivirhe",
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 136309,
+    "totalTokens": 12036,
+    "avgTokPerSec": 97.02525169408467,
+    "promptChars": 10823,
+    "promptTokensEst": 2706,
+    "score": 0,
+    "stars": "☆☆☆☆☆",
+    "error": "Testit kaatuivat",
+    "round": 1
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 28177,
+    "totalTokens": 2946,
+    "avgTokPerSec": 121.23541038097,
+    "promptChars": 11836,
+    "promptTokensEst": 2959,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 8,
+    "testsPassed": 8,
+    "testsFailed": 0,
+    "totalDurationMs": 22631,
+    "totalTokens": 2352,
+    "avgTokPerSec": 121.93930190168658,
+    "promptChars": 10440,
+    "promptTokensEst": 2610,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 12,
+    "testsPassed": 12,
+    "testsFailed": 0,
+    "totalDurationMs": 40394,
+    "totalTokens": 4225,
+    "avgTokPerSec": 120.84107397324551,
+    "promptChars": 12362,
+    "promptTokensEst": 3091,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 46081,
+    "totalTokens": 2542,
+    "avgTokPerSec": 60.93046828700026,
+    "promptChars": 11412,
+    "promptTokensEst": 2853,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 41323,
+    "totalTokens": 2272,
+    "avgTokPerSec": 60.99406174164295,
+    "promptChars": 10884,
+    "promptTokensEst": 2721,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 14,
+    "testsPassed": 2,
+    "testsFailed": 12,
+    "totalDurationMs": 262591,
+    "totalTokens": 14129,
+    "avgTokPerSec": 57.91340837830759,
+    "promptChars": 12143,
+    "promptTokensEst": 3036,
+    "score": 29,
+    "stars": "★★☆☆☆",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 24007,
+    "totalTokens": 2137,
+    "avgTokPerSec": 101.05982103292858,
+    "promptChars": 10756,
+    "promptTokensEst": 2689,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 7,
+    "testsPassed": 6,
+    "testsFailed": 1,
+    "totalDurationMs": 68739,
+    "totalTokens": 6199,
+    "avgTokPerSec": 98.9825675198183,
+    "promptChars": 10313,
+    "promptTokensEst": 2578,
+    "score": 71,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 2
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 23472,
+    "totalTokens": 2427,
+    "avgTokPerSec": 120.85293828875076,
+    "promptChars": 11663,
+    "promptTokensEst": 2916,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 8,
+    "testsPassed": 8,
+    "testsFailed": 0,
+    "totalDurationMs": 25864,
+    "totalTokens": 2671,
+    "avgTokPerSec": 120.6883137195962,
+    "promptChars": 11148,
+    "promptTokensEst": 2787,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 12,
+    "testsPassed": 12,
+    "testsFailed": 0,
+    "totalDurationMs": 41074,
+    "totalTokens": 4275,
+    "avgTokPerSec": 120.33351485161673,
+    "promptChars": 12664,
+    "promptTokensEst": 3166,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 40457,
+    "totalTokens": 2229,
+    "avgTokPerSec": 61.093615619948345,
+    "promptChars": 10905,
+    "promptTokensEst": 2726,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 1,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 77506,
+    "totalTokens": 4268,
+    "avgTokPerSec": 60.19655522627278,
+    "promptChars": 11135,
+    "promptTokensEst": 2784,
+    "score": 90,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 12,
+    "testsPassed": 12,
+    "testsFailed": 0,
+    "totalDurationMs": 74791,
+    "totalTokens": 3590,
+    "avgTokPerSec": 60.549298891176214,
+    "promptChars": 11653,
+    "promptTokensEst": 2913,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 26402,
+    "totalTokens": 2358,
+    "avgTokPerSec": 100.76936895480246,
+    "promptChars": 11243,
+    "promptTokensEst": 2811,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 20751,
+    "totalTokens": 1837,
+    "avgTokPerSec": 101.05480893032836,
+    "promptChars": 10553,
+    "promptTokensEst": 2638,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 3
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 22098,
+    "totalTokens": 2283,
+    "avgTokPerSec": 121.81254413612446,
+    "promptChars": 11503,
+    "promptTokensEst": 2876,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 2,
+    "testsTotal": 8,
+    "testsPassed": 8,
+    "testsFailed": 0,
+    "totalDurationMs": 65403,
+    "totalTokens": 6779,
+    "avgTokPerSec": 118.13288294758586,
+    "promptChars": 10939,
+    "promptTokensEst": 2735,
+    "score": 80,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 10,
+    "testsPassed": 10,
+    "testsFailed": 0,
+    "totalDurationMs": 36044,
+    "totalTokens": 3748,
+    "avgTokPerSec": 120.14822967005487,
+    "promptChars": 12639,
+    "promptTokensEst": 3160,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 38501,
+    "totalTokens": 2113,
+    "avgTokPerSec": 61.01814139430428,
+    "promptChars": 10929,
+    "promptTokensEst": 2732,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 8,
+    "testsPassed": 1,
+    "testsFailed": 7,
+    "totalDurationMs": 147057,
+    "totalTokens": 7799,
+    "avgTokPerSec": 56.209406465865904,
+    "promptChars": 11207,
+    "promptTokensEst": 2802,
+    "score": 28,
+    "stars": "★★☆☆☆",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 12,
+    "testsPassed": 12,
+    "testsFailed": 0,
+    "totalDurationMs": 227508,
+    "totalTokens": 12026,
+    "avgTokPerSec": 58.52888492610325,
+    "promptChars": 11809,
+    "promptTokensEst": 2952,
+    "score": 80,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 11,
+    "testsPassed": 11,
+    "testsFailed": 0,
+    "totalDurationMs": 131964,
+    "totalTokens": 11403,
+    "avgTokPerSec": 97.10963264920952,
+    "promptChars": 11786,
+    "promptTokensEst": 2947,
+    "score": 80,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 38820,
+    "totalTokens": 1826,
+    "avgTokPerSec": 101.07773707712924,
+    "promptChars": 10568,
+    "promptTokensEst": 2642,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 4
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 1,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 39797,
+    "totalTokens": 3776,
+    "avgTokPerSec": 120.91801837211113,
+    "promptChars": 11435,
+    "promptTokensEst": 2859,
+    "score": 90,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 9,
+    "testsPassed": 8,
+    "testsFailed": 1,
+    "totalDurationMs": 87836,
+    "totalTokens": 9343,
+    "avgTokPerSec": 119.28783662683314,
+    "promptChars": 10718,
+    "promptTokensEst": 2680,
+    "score": 73,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3-coder:30b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 10,
+    "testsPassed": 10,
+    "testsFailed": 0,
+    "totalDurationMs": 36644,
+    "totalTokens": 3897,
+    "avgTokPerSec": 122.28607796191666,
+    "promptChars": 12598,
+    "promptTokensEst": 3150,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 1,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 127532,
+    "totalTokens": 3919,
+    "avgTokPerSec": 34.13133325491828,
+    "promptChars": 11352,
+    "promptTokensEst": 2838,
+    "score": 90,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 8,
+    "testsPassed": 6,
+    "testsFailed": 2,
+    "totalDurationMs": 217365,
+    "totalTokens": 7764,
+    "avgTokPerSec": 38.67613170588518,
+    "promptChars": 10834,
+    "promptTokensEst": 2709,
+    "score": 65,
+    "stars": "★★★☆☆",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:14b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 14,
+    "testsPassed": 7,
+    "testsFailed": 7,
+    "totalDurationMs": 248311,
+    "totalTokens": 13443,
+    "avgTokPerSec": 58.05680015263308,
+    "promptChars": 12219,
+    "promptTokensEst": 3055,
+    "score": 50,
+    "stars": "★★★☆☆",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 38326,
+    "totalTokens": 2079,
+    "avgTokPerSec": 100.89778087504016,
+    "promptChars": 10908,
+    "promptTokensEst": 2727,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 60823,
+    "totalTokens": 1772,
+    "avgTokPerSec": 96.76383996716295,
+    "promptChars": 10378,
+    "promptTokensEst": 2595,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 11,
+    "testsPassed": 11,
+    "testsFailed": 0,
+    "totalDurationMs": 81654,
+    "totalTokens": 3458,
+    "avgTokPerSec": 95.65675360193613,
+    "promptChars": 11914,
+    "promptTokensEst": 2979,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T10-03.html b/kipina-codebench/results/2026-04-14T10-03.html
new file mode 100644
index 0000000..b7b16cf
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-03.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T10-03.json b/kipina-codebench/results/2026-04-14T10-03.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-03.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T10-31.html b/kipina-codebench/results/2026-04-14T10-31.html
new file mode 100644
index 0000000..815a791
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-31.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="fi">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Kipina Model Benchmark</title>
+<style>
+  :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
+  h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
+  .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
+  .card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
+  .card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
+  .card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
+  table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
+  th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
+  th:hover { color: var(--text); }
+  th.sorted-asc::after { content: ' ▲'; }
+  th.sorted-desc::after { content: ' ▼'; }
+  td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
+  tr:hover td { background: #1c2128; }
+  .pass { color: var(--green); }
+  .partial { color: var(--yellow); }
+  .fail { color: var(--red); }
+  .stars { letter-spacing: 1px; }
+  .bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
+  .bar-bg { background: var(--border); }
+  .bar-fill { background: var(--green); }
+  .bar-partial { background: var(--yellow); }
+  .model-name { font-weight: 600; }
+  h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
+  .summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
+</style>
+</head>
+<body>
+
+<h1>Kipina Model Benchmark</h1>
+<div class="meta" id="meta"></div>
+
+<div class="cards" id="cards"></div>
+
+<h2>Mallikohtainen yhteenveto</h2>
+<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
+
+<h2>Kaikki tulokset</h2>
+<table id="results-table"><thead></thead><tbody></tbody></table>
+
+<script>
+const RAW = [{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":97527,"totalTokens":2228,"avgTokPerSec":100.69171830800946,"promptChars":11566,"promptTokensEst":2892,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":39549,"totalTokens":1960,"avgTokPerSec":100.98265593129491,"promptChars":11073,"promptTokensEst":2768,"score":100,"stars":"★★★★★","error":null,"round":1},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":1},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":131339,"totalTokens":11518,"avgTokPerSec":96.52358107464266,"promptChars":12388,"promptTokensEst":3097,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":2},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":20658,"totalTokens":1808,"avgTokPerSec":101.0081173861862,"promptChars":11057,"promptTokensEst":2764,"score":100,"stars":"★★★★★","error":null,"round":2},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":2},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":1,"fixRounds":5,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":320031,"totalTokens":11985,"avgTokPerSec":54.915025374575386,"promptChars":12517,"promptTokensEst":3129,"score":0,"stars":"☆☆☆☆☆","error":"Testit kaatuivat","round":3},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":7,"testsFailed":0,"totalDurationMs":28654,"totalTokens":1877,"avgTokPerSec":100.70920643946336,"promptChars":10747,"promptTokensEst":2687,"score":100,"stars":"★★★★★","error":null,"round":3},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":false,"specEntities":0,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":0,"promptTokensEst":0,"score":0,"stars":"","error":"JSON-speksi epäonnistui","round":3},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":1,"testsTotal":12,"testsPassed":12,"testsFailed":0,"totalDurationMs":67943,"totalTokens":6002,"avgTokPerSec":98.29436788902672,"promptChars":12389,"promptTokensEst":3097,"score":90,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":20203,"totalTokens":1774,"avgTokPerSec":100.9066297884274,"promptChars":10905,"promptTokensEst":2726,"score":100,"stars":"★★★★★","error":null,"round":4},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":13,"testsPassed":12,"testsFailed":1,"totalDurationMs":148491,"totalTokens":12747,"avgTokPerSec":95.18237885727869,"promptChars":12476,"promptTokensEst":3119,"score":75,"stars":"★★★★☆","error":null,"round":4},{"model":"qwen3:8b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":23830,"totalTokens":2102,"avgTokPerSec":100.641489789061,"promptChars":11404,"promptTokensEst":2851,"score":100,"stars":"★★★★★","error":null,"round":5},{"model":"qwen3:8b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":3,"testsTotal":8,"testsPassed":6,"testsFailed":2,"totalDurationMs":122453,"totalTokens":7285,"avgTokPerSec":94.12482830400619,"promptChars":11400,"promptTokensEst":2850,"score":65,"stars":"★★★☆☆","error":null,"round":5},{"model":"qwen3:8b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":3,"testsTotal":11,"testsPassed":10,"testsFailed":1,"totalDurationMs":147125,"totalTokens":9893,"avgTokPerSec":97.37021605085566,"promptChars":12455,"promptTokensEst":3114,"score":75,"stars":"★★★★☆","error":null,"round":5}];
+
+const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
+function calcScore(r) {
+  if (r.error && r.testsTotal === 0) return 0;
+  let s = 0;
+  if (r.specOk) s += 10;
+  if (!r.error || r.testsTotal > 0) s += 10;
+  if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
+  s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
+  return Math.min(100, s);
+}
+// Laske pisteet jos puuttuvat
+const DATA = RAW.map(r => {
+  if (r.score == null) r.score = calcScore(r);
+  if (!r.stars) r.stars = starsFor(r.score);
+  if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
+  return r;
+});
+const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
+const pctBar = (passed, total, w=80) => {
+  if (total === 0) return '-';
+  const pct = passed/total*100;
+  const c = pct === 100 ? 'bar-fill' : 'bar-partial';
+  return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
+};
+
+// Meta
+const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
+document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')} — ${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
+
+// Cards
+const models = [...new Set(DATA.map(r => r.model))];
+const scenarios = [...new Set(DATA.map(r => r.scenario))];
+const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
+const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
+const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
+const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
+const bestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
+}).sort((a,b) => b.avg - a.avg)[0];
+const fastestModel = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
+}).sort((a,b) => b.speed - a.speed)[0];
+
+document.getElementById('cards').innerHTML = `
+  <div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
+  <div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
+  <div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
+  <div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
+  <div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
+  <div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
+`;
+
+// Summary table
+const sumHead = document.querySelector('#summary-table thead');
+const sumBody = document.querySelector('#summary-table tbody');
+sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
+
+const modelRows = models.map(m => {
+  const mrs = DATA.filter(r => r.model === m);
+  const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
+  const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
+  const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
+  const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
+  const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
+  const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
+  const scenCols = scenarios.map(s => {
+    const r = mrs.find(r => r.scenario === s);
+    if (!r) return '<td>-</td>';
+    return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
+  }).join('');
+  return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
+}).sort((a,b) => b.avg - a.avg);
+sumBody.innerHTML = modelRows.map(r => r.html).join('');
+
+// Results table
+const resHead = document.querySelector('#results-table thead');
+const resBody = document.querySelector('#results-table tbody');
+const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
+resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
+
+let sortCol = 9, sortAsc = false;
+function renderResults() {
+  const sorted = [...DATA].sort((a,b) => {
+    const vals = [
+      [a.model, b.model],
+      [a.scenario, b.scenario],
+      [a.specEntities, b.specEntities],
+      [a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
+      [a.fixRounds, b.fixRounds],
+      [a.promptTokensEst, b.promptTokensEst],
+      [a.totalTokens, b.totalTokens],
+      [a.totalDurationMs, b.totalDurationMs],
+      [a.avgTokPerSec, b.avgTokPerSec],
+      [a.score, b.score],
+    ][sortCol];
+    const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
+    return sortAsc ? cmp : -cmp;
+  });
+  resBody.innerHTML = sorted.map(r => {
+    const c = cls(r);
+    return `<tr>
+      <td class="model-name">${r.model}</td>
+      <td>${r.scenario}</td>
+      <td>${r.specOk ? `✓ ${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
+      <td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
+      <td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
+      <td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
+      <td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
+      <td>${r.avgTokPerSec.toFixed(0)}</td>
+      <td><span class="stars">${r.stars}</span> ${r.score}p</td>
+    </tr>`;
+  }).join('');
+  document.querySelectorAll('#results-table th').forEach((th,i) => {
+    th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
+  });
+}
+document.querySelector('#results-table thead').addEventListener('click', e => {
+  const col = parseInt(e.target.dataset.col);
+  if (isNaN(col)) return;
+  if (sortCol === col) sortAsc = !sortAsc;
+  else { sortCol = col; sortAsc = false; }
+  renderResults();
+});
+renderResults();
+</script>
+</body>
+</html>
diff --git a/kipina-codebench/results/2026-04-14T10-31.json b/kipina-codebench/results/2026-04-14T10-31.json
new file mode 100644
index 0000000..3a904cf
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-31.json
@@ -0,0 +1,317 @@
+[
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 97527,
+    "totalTokens": 2228,
+    "avgTokPerSec": 100.69171830800946,
+    "promptChars": 11566,
+    "promptTokensEst": 2892,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 39549,
+    "totalTokens": 1960,
+    "avgTokPerSec": 100.98265593129491,
+    "promptChars": 11073,
+    "promptTokensEst": 2768,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 1
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 131339,
+    "totalTokens": 11518,
+    "avgTokPerSec": 96.52358107464266,
+    "promptChars": 12388,
+    "promptTokensEst": 3097,
+    "score": 0,
+    "stars": "☆☆☆☆☆",
+    "error": "Testit kaatuivat",
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 20658,
+    "totalTokens": 1808,
+    "avgTokPerSec": 101.0081173861862,
+    "promptChars": 11057,
+    "promptTokensEst": 2764,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 2
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 1,
+    "fixRounds": 5,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 320031,
+    "totalTokens": 11985,
+    "avgTokPerSec": 54.915025374575386,
+    "promptChars": 12517,
+    "promptTokensEst": 3129,
+    "score": 0,
+    "stars": "☆☆☆☆☆",
+    "error": "Testit kaatuivat",
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 7,
+    "testsPassed": 7,
+    "testsFailed": 0,
+    "totalDurationMs": 28654,
+    "totalTokens": 1877,
+    "avgTokPerSec": 100.70920643946336,
+    "promptChars": 10747,
+    "promptTokensEst": 2687,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": false,
+    "specEntities": 0,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 0,
+    "testsPassed": 0,
+    "testsFailed": 0,
+    "totalDurationMs": 0,
+    "totalTokens": 0,
+    "avgTokPerSec": 0,
+    "promptChars": 0,
+    "promptTokensEst": 0,
+    "score": 0,
+    "stars": "",
+    "error": "JSON-speksi epäonnistui",
+    "round": 3
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 1,
+    "testsTotal": 12,
+    "testsPassed": 12,
+    "testsFailed": 0,
+    "totalDurationMs": 67943,
+    "totalTokens": 6002,
+    "avgTokPerSec": 98.29436788902672,
+    "promptChars": 12389,
+    "promptTokensEst": 3097,
+    "score": 90,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 20203,
+    "totalTokens": 1774,
+    "avgTokPerSec": 100.9066297884274,
+    "promptChars": 10905,
+    "promptTokensEst": 2726,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 13,
+    "testsPassed": 12,
+    "testsFailed": 1,
+    "totalDurationMs": 148491,
+    "totalTokens": 12747,
+    "avgTokPerSec": 95.18237885727869,
+    "promptChars": 12476,
+    "promptTokensEst": 3119,
+    "score": 75,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 4
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "todo",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 0,
+    "testsTotal": 6,
+    "testsPassed": 6,
+    "testsFailed": 0,
+    "totalDurationMs": 23830,
+    "totalTokens": 2102,
+    "avgTokPerSec": 100.641489789061,
+    "promptChars": 11404,
+    "promptTokensEst": 2851,
+    "score": 100,
+    "stars": "★★★★★",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "users",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 1,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 8,
+    "testsPassed": 6,
+    "testsFailed": 2,
+    "totalDurationMs": 122453,
+    "totalTokens": 7285,
+    "avgTokPerSec": 94.12482830400619,
+    "promptChars": 11400,
+    "promptTokensEst": 2850,
+    "score": 65,
+    "stars": "★★★☆☆",
+    "error": null,
+    "round": 5
+  },
+  {
+    "model": "qwen3:8b",
+    "scenario": "blog",
+    "reqOk": true,
+    "specOk": true,
+    "specEntities": 2,
+    "validationIssues": 0,
+    "fixRounds": 3,
+    "testsTotal": 11,
+    "testsPassed": 10,
+    "testsFailed": 1,
+    "totalDurationMs": 147125,
+    "totalTokens": 9893,
+    "avgTokPerSec": 97.37021605085566,
+    "promptChars": 12455,
+    "promptTokensEst": 3114,
+    "score": 75,
+    "stars": "★★★★☆",
+    "error": null,
+    "round": 5
+  }
+]
\ No newline at end of file