Siirrä kipina-codebench projektin päätasolle

This commit is contained in:
2026-04-14 09:44:14 +03:00
parent b93ae2fd1b
commit 7b27800390
24 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
FROM python:3.14-slim
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
WORKDIR /work
ENV PYTHONPATH=/work
ENTRYPOINT ["sh", "-c", "uv init --no-readme --python '>=3.14' 2>/dev/null && rm -f hello.py main.py && uv add fastapi 'uvicorn[standard]' sqlalchemy pytest httpx 2>/dev/null && cp /src/*.py . && rm -f app.db test.db && uv run pytest test_main.py -v --tb=short 2>&1"]

View File

@@ -0,0 +1,95 @@
# Kipinä CodeBench
LLM-koodingenerointibenchmark. Testaa Ollama-mallien kykyä generoida toimivia FastAPI+SQLAlchemy-projekteja ja ajaa testit Docker-kontissa.
## Pikastart
```bash
# 1. Rakenna Docker-testikontti
docker build -t kipina-pytest -f Dockerfile.pytest .
# 2. Aja benchmark
node benchmark.mjs --ollama http://localhost:11434 --scenarios all
# 3. Avaa raportti
open /tmp/kipina-benchmark/report.html
```
## Pipeline
```
1. LLM → vaatimusmäärittely (prompts/client.md)
2. LLM → JSON-speksi (prompts/spec.md)
3. LLM → 4 Python-tiedostoa (prompts/code.md + golden-examples/)
4. Staattinen validointi + LLM-korjaus (prompts/fix.md)
5. Docker: uv init + uv add + pytest
```
## CLI-argumentit
| Argumentti | Oletus | Kuvaus |
|-----------|--------|--------|
| `--ollama` | `http://localhost:11434` | Ollama-palvelimen URL |
| `--hub` | - | Hub-reitti (vaihtoehto Ollamalle) |
| `--models` | kaikki | Pilkuilla erotettu mallilista |
| `--scenarios` | `default` (todo) | `all` = todo, users, blog |
| `--output` | `/tmp/kipina-benchmark` | Tuloshakemisto |
## Hakemistorakenne
```
kipina-codebench/
├── benchmark.mjs ← runner
├── Dockerfile.pytest ← Python 3.14 + uv testikontti
├── report-template.html ← HTML-raporttipohja
├── package.json
├── prompts/ ← muokattavat promptit
│ ├── client.md ← vaatimusmäärittely
│ ├── spec.md ← JSON-speksi
│ ├── code.md ← koodigenerointi
│ └── fix.md ← korjaus
├── golden-examples/ ← referenssitoteutukset
│ ├── todo/ ← taso 1: perus-CRUD (6 testiä)
│ ├── blog/ ← taso 2: relaatiot (13 testiä)
│ └── DOCUMENTATION.md ← zensical-dokumentointiohjeet
└── results/ ← tallennetut tulokset
```
## Promptien muokkaus
Promptit ovat `prompts/`-kansiossa Markdown-tiedostoina. Muokkaa suoraan — benchmark lataa ne käynnistyksessä.
Esimerkki: lisää sääntö `prompts/code.md`:hen:
```
- Tests: PUT/update test data MUST include ALL required fields
```
## Kultaiset esimerkit
`golden-examples/todo/` syötetään LLM:lle referenssinä. Malli näkee tarkalleen millaista koodia odotetaan:
- SQLAlchemy 2.0 (DeclarativeBase, Mapped, mapped_column)
- Pydantic v2 (ConfigDict)
- Python 3.14 syntaksi (str | None)
- Uniikki testidata per testi
Lisää uusia esimerkkejä luomalla hakemisto (esim. `golden-examples/shop/`).
## Pisteytys
| Komponentti | Pisteet | Peruste |
|---|---|---|
| Speksi OK | 10p | JSON-speksi onnistui |
| Koodi generoitu | 10p | Kaikki 4 tiedostoa syntyneet |
| Testit | 060p | passed/total × 60 |
| Korjaukset | 020p | 0 kierrosta = 20p, 1 = 10p, 2+ = 0p |
Tähdet: ★★★★★ (90+), ★★★★☆ (70+), ★★★☆☆ (50+), ★★☆☆☆ (25+), ★☆☆☆☆ (1+)
## Käyttö git-submodulena
```bash
git submodule add <repo-url> tools/codebench
cd tools/codebench
docker build -t kipina-pytest -f Dockerfile.pytest .
node benchmark.mjs --ollama http://localhost:11434 --scenarios all
```

View File

@@ -0,0 +1,490 @@
#!/usr/bin/env node
/**
* Kipinä CodeBench — LLM-koodingenerointibenchmark
*
* Generoi FastAPI-projekteja Ollama-malleilla ja testaa pytest:llä Docker-kontissa.
*
* Käyttö:
* node benchmark.mjs # kaikki mallit, oletusskenaario
* node benchmark.mjs --models qwen3-coder:30b # yksi malli
* node benchmark.mjs --ollama http://host:11434 # eri Ollama
* node benchmark.mjs --scenarios all # kaikki skenaariot
* node benchmark.mjs --output ./results/run-001 # custom output-hakemisto
*/
import { execSync } from 'child_process';
import { writeFileSync, readFileSync, mkdirSync, rmSync, existsSync, readdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
// === CLI-argumentit ===
const args = process.argv.slice(2);
function arg(name, fallback) {
const i = args.indexOf(`--${name}`);
return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
}
const OLLAMA_URL = arg('ollama', process.env.OLLAMA_URL || 'http://localhost:11434');
const HUB_URL = arg('hub', '');
const FILTER_MODELS = arg('models', '');
const SCENARIO_FILTER = arg('scenarios', 'default');
const OUTPUT_DIR = arg('output', '/tmp/kipina-benchmark');
const MAX_FIX_ROUNDS = 2;
// === Promptien lataus tiedostoista ===
function loadPrompt(name) {
const path = join(__dirname, 'prompts', `${name}.md`);
if (!existsSync(path)) throw new Error(`Prompti puuttuu: ${path}`);
return readFileSync(path, 'utf-8').trim();
}
const CLIENT_SYSTEM = loadPrompt('client');
const SPEC_SYSTEM = loadPrompt('spec');
const CODE_SYSTEM = loadPrompt('code');
const FIX_SYSTEM = loadPrompt('fix');
// === Kultaisten esimerkkien lataus ===
const GOLDEN_DIR = join(__dirname, 'golden-examples');
const GOLDEN_PY_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
function loadGoldenExample() {
const todoDir = join(GOLDEN_DIR, 'todo');
if (!existsSync(todoDir)) return '';
let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n';
for (const f of GOLDEN_PY_FILES) {
const path = join(todoDir, f);
if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
}
return example;
}
const GOLDEN_EXAMPLE = loadGoldenExample();
// === Ajattelutagien siivous (gemma4, qwen3/3.5 ym.) ===
function stripThinking(text) {
return text
.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '') // gemma4
.replace(/<think>[\s\S]*?<\/think>/g, '') // qwen3, qwen3.5
.trim();
}
// === Ollama / Hub -client ===
async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) {
const start = Date.now();
if (HUB_URL) {
const taskId = `bench-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;
const resp = await fetch(`${HUB_URL}/api/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt, task_id: taskId, system_prompt: systemPrompt, max_tokens: maxTokens }),
});
if (!resp.ok) throw new Error(`Hub HTTP ${resp.status}: ${await resp.text()}`);
const data = await resp.json();
const elapsed = Date.now() - start;
return {
text: stripThinking((data.response || '').trim()),
tokens: data.tokens_generated || 0,
durationMs: elapsed,
tokPerSec: data.tokens_per_sec || (data.tokens_generated || 0) / (elapsed / 1000),
};
}
// Suora Ollama-reitti
const messages = [];
if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
messages.push({ role: 'user', content: prompt });
const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
stream: false,
think: false,
options: { num_predict: maxTokens, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
}),
});
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`);
const data = await resp.json();
const elapsed = Date.now() - start;
const rawContent = (data.message?.content || '').trim();
const thinking = (data.message?.thinking || '').trim();
const text = stripThinking(rawContent || thinking);
const evalCount = data.eval_count || 0;
if (!rawContent && thinking) console.log(` ⚠ thinking-malli: ${thinking.length} merkkiä ajattelua, content tyhjä`);
const evalDurationNs = data.eval_duration || 1;
const tokPerSec = evalCount / (evalDurationNs / 1e9);
return { text, tokens: evalCount, durationMs: elapsed, tokPerSec };
}
async function ollamaListModels() {
const url = HUB_URL ? `${HUB_URL}/api/v1/ollama/tags` : `${OLLAMA_URL}/api/tags`;
const resp = await fetch(url);
if (!resp.ok) throw new Error(`Tags: HTTP ${resp.status}`);
const data = await resp.json();
return (data.models || []).map(m => m.name);
}
// === Tiedostoparseri LLM-vastauksesta ===
function parseGeneratedFiles(text) {
const files = {};
const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/);
for (let i = 1; i < sections.length - 1; i += 2) {
const name = sections[i];
let content = sections[i + 1].trim();
content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
if (content) files[name] = content + '\n';
}
return files;
}
// === Validaattori ===
function validateProjectCode(files) {
const issues = [];
for (const [fname, code] of Object.entries(files)) {
if (!fname.endsWith('.py')) continue;
const lines = code.split('\n');
for (const line of lines) {
if (/^from\s+\.(\w*)\s+import/.test(line)) issues.push(`ISSUE: ${fname}: relatiivinen import`);
}
for (const line of lines) {
const m = line.match(/^from\s+(models|schemas|main)\s+import\s+(.+)/);
if (!m) continue;
const srcCode = files[m[1] + '.py'];
if (!srcCode) { issues.push(`ISSUE: ${fname}: ${m[1]}.py puuttuu`); continue; }
const names = m[2].split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim());
for (const name of names) {
if (name && !srcCode.includes(name)) issues.push(`ISSUE: ${fname}: "${name}" puuttuu ${m[1]}.py:stä`);
}
}
if (fname === 'schemas.py') {
if (/:\s*date\b/.test(code) && !/from datetime import/.test(code))
issues.push('ISSUE: schemas.py: date-import puuttuu');
if (/:\s*datetime\b/.test(code) && !/from datetime import/.test(code))
issues.push('ISSUE: schemas.py: datetime-import puuttuu');
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (/^\s*#/.test(line) || /^\s*$/.test(line)) continue;
if (/(?<!["\w])false(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "false" → "False"`);
if (/(?<!["\w])true(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "true" → "True"`);
}
}
return issues;
}
function extractJson(text) {
const m = text.match(/```(?:json)?\s*\n([\s\S]*?)```/);
if (m) text = m[1].trim();
let depth = 0, start = null;
for (let i = 0; i < text.length; i++) {
if (text[i] === '{') { if (depth === 0) start = i; depth++; }
else if (text[i] === '}') { depth--; if (depth === 0 && start !== null) { try { return JSON.parse(text.slice(start, i+1)); } catch(e) { continue; } } }
}
return null;
}
// === Testiskenaariot ===
const SCENARIOS = [
{ id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' },
{ id: 'users', prompt: 'REST API käyttäjähallinnalle SQLite-tietokannalla' },
{ id: 'blog', prompt: 'Blogi-API: kirjoittajat ja artikkelit, julkaisupäivämäärä ja status' },
];
// === Pisteytys (0100) ja tähtiluokitus ===
function scoreResult(r) {
if (r.error && r.testsTotal === 0) return 0;
let score = 0;
if (r.specOk) score += 10;
if (!r.error || r.testsTotal > 0) score += 10;
if (r.testsTotal > 0) score += Math.round((r.testsPassed / r.testsTotal) * 60);
score += Math.max(0, 20 - r.fixRounds * 10);
return Math.min(100, score);
}
function starsForScore(score) {
if (score >= 90) return '★★★★★';
if (score >= 70) return '★★★★☆';
if (score >= 50) return '★★★☆☆';
if (score >= 25) return '★★☆☆☆';
if (score > 0) return '★☆☆☆☆';
return '☆☆☆☆☆';
}
// === Pipeline: yhdelle mallille ja skenaariolle ===
async function runPipeline(model, scenario) {
const result = {
model, scenario: scenario.id,
reqOk: false, specOk: false, specEntities: 0,
validationIssues: 0, fixRounds: 0,
testsTotal: 0, testsPassed: 0, testsFailed: 0,
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
promptChars: 0, promptTokensEst: 0,
score: 0, stars: '',
error: null,
};
const timings = [];
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
mkdirSync(dir, { recursive: true });
try {
// 1. Vaatimukset
console.log(` [1/5] Vaatimukset...`);
const req = await ollamaChat(model, scenario.prompt, CLIENT_SYSTEM, 2048);
timings.push(req);
if (!req.text || req.text.length < 50) { result.error = 'Vaatimukset liian lyhyet'; return result; }
result.reqOk = true;
writeFileSync(`${dir}/_requirements.txt`, req.text);
// 2. JSON-speksi
console.log(` [2/5] JSON-speksi...`);
const specResp = await ollamaChat(model, `${req.text}\n\nOutput a JSON spec for this project.`, SPEC_SYSTEM, 4096);
timings.push(specResp);
const spec = extractJson(specResp.text);
if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
result.specOk = true;
result.specEntities = spec.entities.length;
writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
// 3. LLM-koodigenerointi
console.log(` [3/5] Koodigenerointi (LLM)...`);
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 4 files. Follow the reference implementation patterns exactly.`;
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
result.promptTokensEst = Math.round(result.promptChars / 4);
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
timings.push(codeResp);
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
const files = parseGeneratedFiles(codeResp.text);
const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
const missing = required.filter(f => !files[f]);
if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }
// 4. Validointi + korjaussilmukka
let issues = validateProjectCode(files);
let fixRound = 0;
while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
fixRound++;
console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
const issuesByFile = {};
for (const issue of issues) {
const m = issue.match(/^ISSUE:\s*(\S+?):/);
const fname = m ? m[1] : 'unknown';
if (!issuesByFile[fname]) issuesByFile[fname] = [];
issuesByFile[fname].push(issue);
}
for (const [fname, fIssues] of Object.entries(issuesByFile)) {
if (!files[fname]) continue;
const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
timings.push(fixResp);
if (fixResp.text) {
files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
}
}
issues = validateProjectCode(files);
}
result.validationIssues = issues.length;
result.fixRounds = fixRound;
// Kirjoita LLM:n generoimat Python-tiedostot
for (const [fn, content] of Object.entries(files)) {
if (fn.endsWith('.py')) writeFileSync(`${dir}/${fn}`, content);
}
// 5. Pytest Docker-kontissa (kipina-pytest image)
console.log(` [5/5] Pytest (Docker)...`);
try {
const pytestOut = execSync(
`docker run --rm -v "${dir}:/src:ro" kipina-pytest 2>&1`,
{ timeout: 120000, encoding: 'utf-8' }
);
writeFileSync(`${dir}/_pytest.txt`, pytestOut);
const passedMatch = pytestOut.match(/(\d+) passed/);
const failedMatch = pytestOut.match(/(\d+) failed/);
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0;
result.testsTotal = result.testsPassed + result.testsFailed;
} catch (e) {
const output = e.stdout || e.stderr || e.message || '';
writeFileSync(`${dir}/_pytest.txt`, output);
const passedMatch = output.match(/(\d+) passed/);
const failedMatch = output.match(/(\d+) failed/);
const errorMatch = output.match(/(\d+) error/);
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0);
result.testsTotal = result.testsPassed + result.testsFailed;
if (result.testsTotal === 0) result.error = 'Pytest kaatui';
}
} catch (e) {
result.error = e.message;
}
// Yhteenveto
result.totalDurationMs = timings.reduce((s, t) => s + t.durationMs, 0);
result.totalTokens = timings.reduce((s, t) => s + t.tokens, 0);
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
result.score = scoreResult(result);
result.stars = starsForScore(result.score);
return result;
}
// === Main ===
async function main() {
console.log('╔══════════════════════════════════════════════╗');
console.log('║ Kipinä CodeBench ║');
console.log('╚══════════════════════════════════════════════╝');
console.log(`Ollama: ${OLLAMA_URL}`);
// Haetaan mallit
let models;
try {
models = await ollamaListModels();
} catch (e) {
console.error(`Ei yhteyttä Ollamaan (${OLLAMA_URL}): ${e.message}`);
process.exit(1);
}
if (FILTER_MODELS) {
const filter = FILTER_MODELS.split(',').map(s => s.trim());
models = models.filter(m => filter.some(f => m.includes(f)));
}
console.log(`Mallit (${models.length}): ${models.join(', ')}`);
const scenarios = SCENARIO_FILTER === 'all' ? SCENARIOS : [SCENARIOS[0]];
console.log(`Skenaariot (${scenarios.length}): ${scenarios.map(s => s.id).join(', ')}`);
console.log(`Tulokset: ${OUTPUT_DIR}/`);
console.log('');
// Puhdista output
rmSync(OUTPUT_DIR, { recursive: true, force: true });
mkdirSync(OUTPUT_DIR, { recursive: true });
const results = [];
for (const model of models) {
for (const scenario of scenarios) {
console.log(`\n━━━ ${model} × ${scenario.id} ━━━`);
const r = await runPipeline(model, scenario);
results.push(r);
const status = r.error ? `${r.error}` :
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` :
`${r.testsPassed}/${r.testsTotal}`;
const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
console.log(`${status} | ${r.stars} ${r.score}p | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
}
}
// === Tulostaulu ===
console.log('\n\n╔══════════════════════════════════════════════════════════════════════════════════════════════════╗');
console.log('║ TULOKSET ║');
console.log('╠══════════════════════════════════════════════════════════════════════════════════════════════════╣');
const header = [
'Malli'.padEnd(40),
'Skenaario'.padEnd(10),
'Speksi'.padEnd(8),
'Testit'.padEnd(10),
'Korjaus'.padEnd(8),
'Ctx'.padEnd(7),
'Aika'.padEnd(8),
'tok/s'.padEnd(8),
'Pisteet',
].join(' │ ');
console.log(`${header}`);
console.log('╠' + '═'.repeat(header.length + 2) + '╣');
for (const r of results) {
const specStatus = r.specOk ? `${r.specEntities}e` : '✗';
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
const speed = `${r.avgTokPerSec.toFixed(0)}`;
const row = [
r.model.padEnd(40),
r.scenario.padEnd(10),
specStatus.padEnd(8),
testStatus.padEnd(10),
fixStatus.padEnd(8),
ctx.padEnd(7),
time.padEnd(8),
speed.padEnd(8),
`${r.stars} ${r.score}`,
].join(' │ ');
console.log(`${row}`);
}
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
// === Mallikohtainen yhteenveto ===
const modelNames = [...new Set(results.map(r => r.model))];
const scenarioIds = scenarios.map(s => s.id);
console.log('\n');
const mHeader = [
'Malli'.padEnd(35),
...scenarioIds.map(s => s.padEnd(22)),
'Yht.'.padEnd(8),
'Out'.padEnd(7),
'Aika'.padEnd(8),
'tok/s'.padEnd(7),
'Pisteet',
].join(' │ ');
console.log(mHeader);
console.log('─'.repeat(mHeader.length));
for (const model of modelNames) {
const mrs = results.filter(r => r.model === model);
const cols = scenarioIds.map(sid => {
const r = mrs.find(r => r.scenario === sid);
if (!r) return '-'.padEnd(22);
const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
const s = `${(r.totalDurationMs/1000).toFixed(0)}s`;
const tok = r.totalTokens > 1000 ? `${(r.totalTokens/1000).toFixed(1)}K` : `${r.totalTokens}`;
return `${t} ${s} ${tok}`.padEnd(22);
});
const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0);
const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0);
const totalTokens = mrs.reduce((s, r) => s + r.totalTokens, 0);
const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0);
const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0;
const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0;
const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0;
const tokStr = totalTokens > 1000 ? `${(totalTokens/1000).toFixed(1)}K` : `${totalTokens}`;
const row = [
model.padEnd(35),
...cols,
`${totalPassed}/${totalTests}`.padEnd(8),
tokStr.padEnd(7),
`${(totalTime/1000).toFixed(0)}s`.padEnd(8),
`${avgSpeed}`.padEnd(7),
`${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`,
].join(' │ ');
console.log(row);
}
// Tallenna JSON + HTML-raportti
writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
const templatePath = join(__dirname, 'report-template.html');
if (existsSync(templatePath)) {
const html = readFileSync(templatePath, 'utf-8').replace(
'/*DATA_PLACEHOLDER*/[]',
JSON.stringify(results)
);
writeFileSync(`${OUTPUT_DIR}/report.html`, html);
console.log(`\nRaportti: ${OUTPUT_DIR}/report.html`);
}
console.log(`JSON: ${OUTPUT_DIR}/results.json`);
// Yhteenveto
const passed = results.filter(r => !r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0);
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
const failed = results.filter(r => r.error || r.testsTotal === 0);
const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0);
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`);
}
main().catch(e => { console.error(e); process.exit(1); });

View File

@@ -0,0 +1,84 @@
# Dokumentointiohjeet — Zensical
Hyvä dokumentointi kertoo **mitä asia ON**, ei mitä se tekee. Se on kuin zen-koan: lyhyt, tarkka, riittävä.
## Periaatteet
1. **Yksi rivi riittää.** Jos tarvitset kappaleen, koodi on liian monimutkainen.
2. **Kerro mitä, älä miten.** `"""Tietokantamallit — SQLAlchemy 2.0, SQLite."""` ei `"""This module creates database models using SQLAlchemy..."""`
3. **Älä toista koodia.** Jos funktio on `create_todo`, docstring ei ole "Creates a todo".
4. **Suomi tai englanti, ei molempia.** Valitse yksi kieli per projekti.
5. **Ei täytesanoja.** "This module provides functionality for" → poista.
## Mitä dokumentoidaan
| Kohde | Dokumentointi | Esimerkki |
|-------|--------------|-----------|
| **Moduuli** (.py) | Aina. Yksi rivi: mitä tiedosto sisältää. | `"""Pydantic v2 -skeemat — Create ja Response."""` |
| **Luokka** | Aina. Mitä entiteetti edustaa. | `"""Tehtävä — otsikko, deadline, prioriteetti."""` |
| **Funktio** | Vain jos nimi ei kerro kaikkea. | `get_db``"""Tietokantasessio per pyyntö."""` |
| **CRUD-endpoint** | Ei. Nimi + HTTP-metodi riittää. | `create_todo`, `list_todos` — itsedokumentoivia |
| **Testi** | Ei. Testin nimi on dokumentaatio. | `test_get_todo_not_found` — selvä |
| **Konfiguraatio** | Kommentti vain jos arvo yllättää. | `check_same_thread: False # SQLite + FastAPI` |
## Mitä EI dokumentoida
- Importteja
- Ilmeisiä parametreja (`item_id: int`)
- Tyyppivihjeitä jotka kertovat saman asian
- Geneerisiä "boilerplate"-docstringejä
## Esimerkkejä
### Hyvä (zensical)
```python
"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite."""
class Todo(Base):
"""Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status."""
...
def get_db():
"""Tietokantasessio per pyyntö."""
...
```
### Huono (verbose)
```python
"""
This module defines the database models for the Todo application.
It uses SQLAlchemy ORM to create the database tables and provides
the session factory for database connections.
"""
class Todo(Base):
"""
Represents a todo item in the database.
Attributes:
id: The unique identifier for the todo item.
title: The title of the todo item.
...
"""
...
```
### Huono (tyhjä)
```python
# Ei docstringejä ollenkaan — lukija ei tiedä mikä tiedoston rooli on
class Todo(Base):
__tablename__ = "todos"
...
```
## Tarkistuslista
Generoitu koodi on hyvin dokumentoitu kun:
- [ ] Jokainen .py-tiedosto alkaa yksirivisellä docstringillä
- [ ] Jokainen luokka kertoo mitä entiteetti edustaa
- [ ] Docstringit ovat saman kielen kuin muu koodi
- [ ] CRUD-endpointeilla ei ole turhia docstringejä
- [ ] Kommentteja on vain siellä missä koodi yllättää

View File

@@ -0,0 +1,123 @@
# Golden Examples — referenssitoteutukset
Kultaiset esimerkit ovat **täydellisiä, testattuja** FastAPI-projekteja joita LLM käyttää mallina koodigeneroinnissa. Malli näkee esimerkin ja tuottaa vastaavan rakenteen uudelle projektille.
## Uuden esimerkin luominen
### 1. Luo hakemisto
```bash
mkdir golden-examples/shop
```
Nimeä hakemisto skenaarion mukaan (todo, blog, shop, booking...).
### 2. Luo 4 tiedostoa
| Tiedosto | Sisältö |
|----------|---------|
| `models.py` | SQLAlchemy 2.0 -mallit (DeclarativeBase, Mapped, mapped_column) |
| `schemas.py` | Pydantic v2 -skeemat (ConfigDict, `str \| None` -syntaksi) |
| `main.py` | FastAPI CRUD -endpointit (POST 201, GET, GET/:id 404, PUT, DELETE 204) |
| `test_main.py` | Pytest + TestClient, erillinen test.db, uniikki data per testi |
### 3. Noudata konventioita
**Python-versio:** >=3.14
**SQLAlchemy 2.0** (ei legacy):
```python
# Oikein
class Base(DeclarativeBase):
pass
class Todo(Base):
id: Mapped[int] = mapped_column(primary_key=True, index=True)
title: Mapped[str] = mapped_column(String(255))
status: Mapped[str] = mapped_column(String(20), default="pending")
# Väärin
Base = declarative_base()
id = Column(Integer, primary_key=True)
```
**Pydantic v2** (ei v1):
```python
# Oikein
class TodoResponse(TodoCreate):
id: int
model_config = ConfigDict(from_attributes=True)
# Väärin
class Config:
orm_mode = True
```
**Tyypitys:**
```python
# Oikein
description: Mapped[str | None] = mapped_column(Text, default=None)
# Väärin
description: Mapped[Optional[str]]
```
**Dokumentointi (zensical):**
```python
"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite."""
class Todo(Base):
"""Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status."""
```
Yksi rivi riittää. Kerro mitä asia ON, älä mitä se tekee. Katso [DOCUMENTATION.md](DOCUMENTATION.md).
**Testidata — uniikki ja kuvaava:**
```python
# Oikein
def test_create_todo():
response = client.post("/todos/", json={"title": "Osta maitoa", "priority": 2})
def test_update_todo():
created = client.post("/todos/", json={"title": "Vanha otsikko"}).json()
# Väärin — geneerinen data
def test_create_todo():
response = client.post("/todos/", json={"title": "test", "priority": 1})
```
### 4. Testaa Docker-kontissa
```bash
rm -rf /tmp/golden-test && mkdir /tmp/golden-test
cp golden-examples/shop/*.py /tmp/golden-test/
docker run --rm -v /tmp/golden-test:/src:ro kipina-pytest
```
**Kaikkien testien pitää mennä läpi.** Ei varoituksia, ei deprecation-viestejä.
### 5. Vaikeustasot
| Taso | Esimerkit | Haaste |
|------|-----------|--------|
| 1 — Perus-CRUD | `todo/`, `users/`, `notes/` | Yksi entiteetti |
| 2 — Relaatiot | `blog/`, `library/`, `school/` | Foreign key, 23 entiteettiä |
| 3 — Liiketoimintalogiikka | `shop/`, `booking/` | Custom endpointit, validointi |
Aloita tasosta 1 ja etene. Tason 1 esimerkkien pitää olla yksinkertaisia — ne opettavat mallille perusrakenteen.
## Miten esimerkit vaikuttavat
Benchmark lataa `todo/`-esimerkin ja syöttää sen LLM:lle osana koodingenerointipromptia:
```
REFERENCE IMPLEMENTATION (todo project — follow this exact structure):
=== models.py ===
<todo/models.py sisältö>
=== schemas.py ===
...
```
Malli näkee tarkan esimerkin ja tuottaa vastaavan rakenteen uudelle projektille. Mitä parempi esimerkki, sitä parempi tulos.

View File

@@ -0,0 +1,110 @@
"""FastAPI CRUD — kaksi endpoint-settiä, Author ja Post."""
from fastapi import FastAPI, Depends, HTTPException
from sqlalchemy.orm import Session
from models import SessionLocal, Author, Post
from schemas import AuthorCreate, AuthorResponse, PostCreate, PostResponse
app = FastAPI()
def get_db():
"""Tietokantasessio per pyyntö."""
db = SessionLocal()
try:
yield db
finally:
db.close()
# --- Author ---
@app.post("/authors/", response_model=AuthorResponse, status_code=201)
def create_author(item: AuthorCreate, db: Session = Depends(get_db)):
db_item = Author(**item.model_dump())
db.add(db_item)
db.commit()
db.refresh(db_item)
return db_item
@app.get("/authors/", response_model=list[AuthorResponse])
def list_authors(db: Session = Depends(get_db)):
return db.query(Author).all()
@app.get("/authors/{item_id}", response_model=AuthorResponse)
def get_author(item_id: int, db: Session = Depends(get_db)):
item = db.query(Author).filter(Author.id == item_id).first()
if not item:
raise HTTPException(status_code=404, detail="Author not found")
return item
@app.put("/authors/{item_id}", response_model=AuthorResponse)
def update_author(item_id: int, item: AuthorCreate, db: Session = Depends(get_db)):
db_item = db.query(Author).filter(Author.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Author not found")
for key, value in item.model_dump().items():
setattr(db_item, key, value)
db.commit()
db.refresh(db_item)
return db_item
@app.delete("/authors/{item_id}", status_code=204)
def delete_author(item_id: int, db: Session = Depends(get_db)):
db_item = db.query(Author).filter(Author.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Author not found")
db.delete(db_item)
db.commit()
# --- Post ---
@app.post("/posts/", response_model=PostResponse, status_code=201)
def create_post(item: PostCreate, db: Session = Depends(get_db)):
db_item = Post(**item.model_dump())
db.add(db_item)
db.commit()
db.refresh(db_item)
return db_item
@app.get("/posts/", response_model=list[PostResponse])
def list_posts(db: Session = Depends(get_db)):
return db.query(Post).all()
@app.get("/posts/{item_id}", response_model=PostResponse)
def get_post(item_id: int, db: Session = Depends(get_db)):
item = db.query(Post).filter(Post.id == item_id).first()
if not item:
raise HTTPException(status_code=404, detail="Post not found")
return item
@app.put("/posts/{item_id}", response_model=PostResponse)
def update_post(item_id: int, item: PostCreate, db: Session = Depends(get_db)):
db_item = db.query(Post).filter(Post.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Post not found")
for key, value in item.model_dump().items():
setattr(db_item, key, value)
db.commit()
db.refresh(db_item)
return db_item
@app.delete("/posts/{item_id}", status_code=204)
def delete_post(item_id: int, db: Session = Depends(get_db)):
db_item = db.query(Post).filter(Post.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Post not found")
db.delete(db_item)
db.commit()

View File

@@ -0,0 +1,45 @@
"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, ForeignKey-relaatiot."""
from datetime import datetime
from sqlalchemy import String, Text, DateTime, ForeignKey, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, sessionmaker
DATABASE_URL = "sqlite:///./app.db"
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
class Author(Base):
"""Kirjoittaja — nimi, sähköposti ja bio."""
__tablename__ = "authors"
id: Mapped[int] = mapped_column(primary_key=True, index=True)
name: Mapped[str] = mapped_column(String(255))
email: Mapped[str] = mapped_column(String(255), unique=True)
bio: Mapped[str | None] = mapped_column(Text, default=None)
posts: Mapped[list["Post"]] = relationship(back_populates="author")
class Post(Base):
"""Blogipostaus — otsikko, sisältö, kirjoittaja, julkaisuaika ja tila."""
__tablename__ = "posts"
id: Mapped[int] = mapped_column(primary_key=True, index=True)
title: Mapped[str] = mapped_column(String(255))
content: Mapped[str] = mapped_column(Text)
author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"))
published_at: Mapped[datetime | None] = mapped_column(DateTime, default=None)
status: Mapped[str] = mapped_column(String(20), default="draft")
author: Mapped["Author"] = relationship(back_populates="posts")
Base.metadata.create_all(bind=engine)

View File

@@ -0,0 +1,37 @@
"""Pydantic v2 -skeemat — Create sisääntulolle, Response vastaukselle."""
from datetime import datetime
from pydantic import BaseModel, ConfigDict
class AuthorCreate(BaseModel):
"""Uuden kirjoittajan luonti. Pakolliset: name, email."""
name: str
email: str
bio: str | None = None
class AuthorResponse(AuthorCreate):
"""Palautettava kirjoittaja — sisältää id:n."""
id: int
model_config = ConfigDict(from_attributes=True)
class PostCreate(BaseModel):
"""Uuden postauksen luonti. Pakolliset: title, content, author_id."""
title: str
content: str
author_id: int
published_at: datetime | None = None
status: str = "draft"
class PostResponse(PostCreate):
"""Palautettava postaus — sisältää id:n."""
id: int
model_config = ConfigDict(from_attributes=True)

View File

@@ -0,0 +1,164 @@
"""Pytest — TestClient, erillinen test.db, uniikki data per testi."""
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from main import app, get_db
from models import Base
test_engine = create_engine(
"sqlite:///./test.db", connect_args={"check_same_thread": False}
)
TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)
Base.metadata.create_all(bind=test_engine)
def override_get_db():
db = TestSession()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_get_db
client = TestClient(app)
def _create_author(name="Eino Leino", email=None):
"""Apufunktio kirjoittajan luomiseen testeissä."""
if email is None:
email = f"{name.lower().replace(' ', '.')}@example.com"
return client.post(
"/authors/", json={"name": name, "email": email}
).json()
# --- Author-testit ---
def test_create_author():
response = client.post(
"/authors/",
json={"name": "Aleksis Kivi", "email": "aleksis@example.com", "bio": "Suomen kansalliskirjailija"},
)
assert response.status_code == 201
assert response.json()["name"] == "Aleksis Kivi"
assert response.json()["bio"] == "Suomen kansalliskirjailija"
assert "id" in response.json()
def test_list_authors():
_create_author("Minna Canth", "minna.canth@example.com")
response = client.get("/authors/")
assert response.status_code == 200
assert len(response.json()) >= 1
def test_get_author_by_id():
created = _create_author("Väinö Linna", "vaino.linna@example.com")
response = client.get(f"/authors/{created['id']}")
assert response.status_code == 200
assert response.json()["id"] == created["id"]
def test_get_author_not_found():
response = client.get("/authors/99999")
assert response.status_code == 404
def test_update_author():
created = _create_author("Vanha Nimi", "vanha.nimi@example.com")
response = client.put(
f"/authors/{created['id']}",
json={"name": "Uusi Nimi", "email": "uusi.nimi@example.com"},
)
assert response.status_code == 200
assert response.json()["name"] == "Uusi Nimi"
def test_delete_author():
created = _create_author("Poistettava Kirjailija", "poistettava@example.com")
response = client.delete(f"/authors/{created['id']}")
assert response.status_code == 204
response = client.get(f"/authors/{created['id']}")
assert response.status_code == 404
# --- Post-testit ---
def test_create_post():
author = _create_author("Tove Jansson", "tove.jansson@example.com")
response = client.post(
"/posts/",
json={"title": "Muumipeikko ja pyrstötähti", "content": "Eräänä aamuna...", "author_id": author["id"]},
)
assert response.status_code == 201
assert response.json()["title"] == "Muumipeikko ja pyrstötähti"
assert response.json()["author_id"] == author["id"]
assert response.json()["status"] == "draft"
def test_list_posts():
author = _create_author("Juhani Aho", "juhani.aho@example.com")
client.post(
"/posts/",
json={"title": "Rautatie", "content": "Junasta kertova novelli.", "author_id": author["id"]},
)
response = client.get("/posts/")
assert response.status_code == 200
assert len(response.json()) >= 1
def test_get_post_by_id():
author = _create_author("Elias Lönnrot", "elias.lonnrot@example.com")
created = client.post(
"/posts/",
json={"title": "Kalevala", "content": "Vaka vanha Väinämöinen.", "author_id": author["id"]},
).json()
response = client.get(f"/posts/{created['id']}")
assert response.status_code == 200
assert response.json()["id"] == created["id"]
def test_get_post_not_found():
response = client.get("/posts/99999")
assert response.status_code == 404
def test_update_post():
author = _create_author("Joel Lehtonen", "joel.lehtonen@example.com")
created = client.post(
"/posts/",
json={"title": "Vanha otsikko", "content": "Alkuperäinen teksti.", "author_id": author["id"]},
).json()
response = client.put(
f"/posts/{created['id']}",
json={"title": "Päivitetty otsikko", "content": "Muokattu teksti.", "author_id": author["id"], "status": "published"},
)
assert response.status_code == 200
assert response.json()["title"] == "Päivitetty otsikko"
assert response.json()["status"] == "published"
def test_delete_post():
author = _create_author("Aino Kallas", "aino.kallas@example.com")
created = client.post(
"/posts/",
json={"title": "Poistettava postaus", "content": "Tämä poistetaan.", "author_id": author["id"]},
).json()
response = client.delete(f"/posts/{created['id']}")
assert response.status_code == 204
response = client.get(f"/posts/{created['id']}")
assert response.status_code == 404
def test_post_belongs_to_author():
author = _create_author("Sofi Oksanen", "sofi.oksanen@example.com")
post = client.post(
"/posts/",
json={"title": "Puhdistus", "content": "Romaani Virosta.", "author_id": author["id"]},
).json()
assert post["author_id"] == author["id"]

View File

@@ -0,0 +1,61 @@
"""FastAPI CRUD — yksi endpoint-setti per entiteetti."""
from fastapi import FastAPI, Depends, HTTPException
from sqlalchemy.orm import Session
from models import SessionLocal, Todo
from schemas import TodoCreate, TodoResponse
app = FastAPI()
def get_db():
"""Tietokantasessio per pyyntö."""
db = SessionLocal()
try:
yield db
finally:
db.close()
@app.post("/todos/", response_model=TodoResponse, status_code=201)
def create_todo(item: TodoCreate, db: Session = Depends(get_db)):
db_item = Todo(**item.model_dump())
db.add(db_item)
db.commit()
db.refresh(db_item)
return db_item
@app.get("/todos/", response_model=list[TodoResponse])
def list_todos(db: Session = Depends(get_db)):
return db.query(Todo).all()
@app.get("/todos/{item_id}", response_model=TodoResponse)
def get_todo(item_id: int, db: Session = Depends(get_db)):
item = db.query(Todo).filter(Todo.id == item_id).first()
if not item:
raise HTTPException(status_code=404, detail="Todo not found")
return item
@app.put("/todos/{item_id}", response_model=TodoResponse)
def update_todo(item_id: int, item: TodoCreate, db: Session = Depends(get_db)):
db_item = db.query(Todo).filter(Todo.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Todo not found")
for key, value in item.model_dump().items():
setattr(db_item, key, value)
db.commit()
db.refresh(db_item)
return db_item
@app.delete("/todos/{item_id}", status_code=204)
def delete_todo(item_id: int, db: Session = Depends(get_db)):
db_item = db.query(Todo).filter(Todo.id == item_id).first()
if not db_item:
raise HTTPException(status_code=404, detail="Todo not found")
db.delete(db_item)
db.commit()

View File

@@ -0,0 +1,30 @@
"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite."""
from datetime import date
from sqlalchemy import String, Text, Date, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
DATABASE_URL = "sqlite:///./app.db"
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
class Todo(Base):
"""Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status."""
__tablename__ = "todos"
id: Mapped[int] = mapped_column(primary_key=True, index=True)
title: Mapped[str] = mapped_column(String(255))
description: Mapped[str | None] = mapped_column(Text, default=None)
due_date: Mapped[date | None] = mapped_column(Date, default=None)
priority: Mapped[int] = mapped_column(default=1)
status: Mapped[str] = mapped_column(String(20), default="pending")
Base.metadata.create_all(bind=engine)

View File

@@ -0,0 +1,11 @@
[project]
name = "todo-app"
version = "0.1.0"
requires-python = ">=3.14"
dependencies = [
"fastapi",
"uvicorn[standard]",
"sqlalchemy",
"pytest",
"httpx",
]

View File

@@ -0,0 +1,22 @@
"""Pydantic v2 -skeemat — Create sisääntulolle, Response vastaukselle."""
from datetime import date
from pydantic import BaseModel, ConfigDict
class TodoCreate(BaseModel):
"""Uuden tehtävän luonti. Pakolliset: title."""
title: str
description: str | None = None
due_date: date | None = None
priority: int = 1
status: str = "pending"
class TodoResponse(TodoCreate):
"""Palautettava tehtävä — sisältää id:n."""
id: int
model_config = ConfigDict(from_attributes=True)

View File

@@ -0,0 +1,69 @@
"""Pytest — TestClient, erillinen test.db, uniikki data per testi."""
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from main import app, get_db
from models import Base
test_engine = create_engine(
"sqlite:///./test.db", connect_args={"check_same_thread": False}
)
TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)
Base.metadata.create_all(bind=test_engine)
def override_get_db():
db = TestSession()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_get_db
client = TestClient(app)
def test_create_todo():
response = client.post("/todos/", json={"title": "Osta maitoa", "priority": 2})
assert response.status_code == 201
assert response.json()["title"] == "Osta maitoa"
assert "id" in response.json()
def test_list_todos():
client.post("/todos/", json={"title": "Listattava tehtävä"})
response = client.get("/todos/")
assert response.status_code == 200
assert len(response.json()) >= 1
def test_get_todo_by_id():
created = client.post("/todos/", json={"title": "Haettava tehtävä"}).json()
response = client.get(f"/todos/{created['id']}")
assert response.status_code == 200
assert response.json()["id"] == created["id"]
def test_get_todo_not_found():
response = client.get("/todos/99999")
assert response.status_code == 404
def test_update_todo():
created = client.post("/todos/", json={"title": "Vanha otsikko"}).json()
response = client.put(
f"/todos/{created['id']}", json={"title": "Uusi otsikko"}
)
assert response.status_code == 200
assert response.json()["title"] == "Uusi otsikko"
def test_delete_todo():
created = client.post("/todos/", json={"title": "Poistettava"}).json()
response = client.delete(f"/todos/{created['id']}")
assert response.status_code == 204
response = client.get(f"/todos/{created['id']}")
assert response.status_code == 404

View File

@@ -0,0 +1,13 @@
{
"name": "kipina-codebench",
"version": "0.1.0",
"description": "LLM-koodingenerointibenchmark — testaa Ollama-mallien kykyä generoida toimivia FastAPI-projekteja",
"type": "module",
"bin": {
"codebench": "./benchmark.mjs"
},
"scripts": {
"bench": "node benchmark.mjs --scenarios all",
"docker:build": "docker build -t kipina-pytest -f Dockerfile.pytest ."
}
}

View File

@@ -0,0 +1,15 @@
You are a product owner who turns vague ideas into clear, actionable software requirements.
GIVEN a short project description from the user, produce a structured brief:
1. PROJECT NAME: a short, descriptive name
2. GOAL: one sentence explaining what the software does and who it's for
3. CORE FEATURES: numbered list of 3-8 concrete features (not vague wishes)
4. DATA MODEL: list the main entities and their key fields (include field types)
5. API ENDPOINTS: list the REST endpoints (method + path + purpose)
6. CONSTRAINTS: any technical constraints (e.g. "must use SQLite", "no auth needed")
RULES:
- Be specific: "User can filter todos by status" not "todo management"
- Use plain English, no code
- Maximum 400 words total

View File

@@ -0,0 +1,36 @@
You are a Python backend developer. Generate a FastAPI project with SQLAlchemy and SQLite.
Given the project requirements, JSON specification, and a REFERENCE IMPLEMENTATION, generate these 4 files:
1. models.py — SQLAlchemy 2.0: DeclarativeBase, Mapped, mapped_column (NOT legacy declarative_base)
2. schemas.py — Pydantic v2: ConfigDict(from_attributes=True) (NOT class Config)
3. main.py — FastAPI CRUD endpoints for each entity
4. test_main.py — Pytest with TestClient, separate test.db, unique test data per test
Do NOT generate pyproject.toml — it is created separately with uv.
OUTPUT FORMAT — use these exact markers to separate files:
=== models.py ===
<python code>
=== schemas.py ===
<python code>
=== main.py ===
<python code>
=== test_main.py ===
<python code>
DOCUMENTATION — every file must have a one-line module docstring. Classes get a one-line docstring. Keep it zensical: say what it IS, not what it does. No filler.
RULES:
- Follow the REFERENCE IMPLEMENTATION patterns exactly
- SQLAlchemy 2.0: DeclarativeBase + Mapped + mapped_column (not Column())
- Python type unions: str | None (not Optional[str])
- Tests: unique descriptive data per test, NOT generic "test_title" strings
- Tests: PUT/update test data MUST include ALL required (non-nullable) fields, not just the field being updated
- Absolute imports only (from models import ..., from schemas import ...)
- NO markdown fences inside file content — just raw code
- Only test endpoints that exist in main.py — no extra tests

View File

@@ -0,0 +1 @@
You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code.

View File

@@ -0,0 +1,31 @@
You are a software architect who designs database schemas for Python web applications.
THINK STEP BY STEP before outputting JSON:
1. What are the main ENTITIES (nouns) in this project?
2. What FIELDS does each entity need? (name, type, required?)
3. Which entities REFERENCE each other? (e.g. "a Book belongs to an Author" → Book has author_id)
4. Are there Date/DateTime fields? → add extra_imports
Then output ONLY valid JSON (no explanations before or after).
SCHEMA:
{"project_name":"short-name","description":"One sentence","entities":[{"name":"EntityName","table_name":"entity_names","fields":[{"name":"field_name","sa_type":"String(255)","py_type":"str","nullable":false,"default":null}]}],"relationships":[{"from":"ChildEntity","field":"parent_id","to":"ParentEntity","type":"many-to-one"}],"extra_imports":[]}
FIELD RULES:
- sa_type: String(N), Text, Integer, Date, DateTime, Boolean, Float
- py_type: str, int, float, bool, date, datetime — append " | None" if nullable
- Status fields: use String(20) with default value, NEVER Enum
- Every entity gets "id" automatically — do NOT add id or redundant ID fields
- Use snake_case for field names
RELATIONSHIP RULES:
- If entity A "belongs to" entity B → A has b_id field (Integer, nullable=false) + relationship entry
- EVERY _id field MUST have a matching relationship entry
- Parent entities must appear BEFORE children in the entities array
- If no relationships, set "relationships": []
AVOID: redundant ID fields, generic names, more than 7 fields or 3 entities, non-English entity/field names (ALWAYS English even if description is Finnish)
EXAMPLES (adapt, don't copy):
Todo app → Todo: title(str), description(Text|None), due_date(Date|None), status(String20="pending")
Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_id→Author, published_at(DateTime|None), status(String20="draft")

View File

@@ -0,0 +1,183 @@
<!DOCTYPE html>
<html lang="fi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kipina Model Benchmark</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
th:hover { color: var(--text); }
th.sorted-asc::after { content: ' ▲'; }
th.sorted-desc::after { content: ' ▼'; }
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
tr:hover td { background: #1c2128; }
.pass { color: var(--green); }
.partial { color: var(--yellow); }
.fail { color: var(--red); }
.stars { letter-spacing: 1px; }
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
.bar-bg { background: var(--border); }
.bar-fill { background: var(--green); }
.bar-partial { background: var(--yellow); }
.model-name { font-weight: 600; }
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
</style>
</head>
<body>
<h1>Kipina Model Benchmark</h1>
<div class="meta" id="meta"></div>
<div class="cards" id="cards"></div>
<h2>Mallikohtainen yhteenveto</h2>
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
<h2>Kaikki tulokset</h2>
<table id="results-table"><thead></thead><tbody></tbody></table>
<script>
const RAW = /*DATA_PLACEHOLDER*/[];
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
function calcScore(r) {
if (r.error && r.testsTotal === 0) return 0;
let s = 0;
if (r.specOk) s += 10;
if (!r.error || r.testsTotal > 0) s += 10;
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
return Math.min(100, s);
}
// Laske pisteet jos puuttuvat
const DATA = RAW.map(r => {
if (r.score == null) r.score = calcScore(r);
if (!r.stars) r.stars = starsFor(r.score);
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
return r;
});
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
const pctBar = (passed, total, w=80) => {
if (total === 0) return '-';
const pct = passed/total*100;
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
};
// Meta
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')}${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
// Cards
const models = [...new Set(DATA.map(r => r.model))];
const scenarios = [...new Set(DATA.map(r => r.scenario))];
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
const bestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
}).sort((a,b) => b.avg - a.avg)[0];
const fastestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
}).sort((a,b) => b.speed - a.speed)[0];
document.getElementById('cards').innerHTML = `
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
`;
// Summary table
const sumHead = document.querySelector('#summary-table thead');
const sumBody = document.querySelector('#summary-table tbody');
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
const modelRows = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
const scenCols = scenarios.map(s => {
const r = mrs.find(r => r.scenario === s);
if (!r) return '<td>-</td>';
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
}).join('');
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
}).sort((a,b) => b.avg - a.avg);
sumBody.innerHTML = modelRows.map(r => r.html).join('');
// Results table
const resHead = document.querySelector('#results-table thead');
const resBody = document.querySelector('#results-table tbody');
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
let sortCol = 9, sortAsc = false;
function renderResults() {
const sorted = [...DATA].sort((a,b) => {
const vals = [
[a.model, b.model],
[a.scenario, b.scenario],
[a.specEntities, b.specEntities],
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
[a.fixRounds, b.fixRounds],
[a.promptTokensEst, b.promptTokensEst],
[a.totalTokens, b.totalTokens],
[a.totalDurationMs, b.totalDurationMs],
[a.avgTokPerSec, b.avgTokPerSec],
[a.score, b.score],
][sortCol];
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
return sortAsc ? cmp : -cmp;
});
resBody.innerHTML = sorted.map(r => {
const c = cls(r);
return `<tr>
<td class="model-name">${r.model}</td>
<td>${r.scenario}</td>
<td>${r.specOk ? `${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
<td>${r.avgTokPerSec.toFixed(0)}</td>
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
</tr>`;
}).join('');
document.querySelectorAll('#results-table th').forEach((th,i) => {
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
});
}
document.querySelector('#results-table thead').addEventListener('click', e => {
const col = parseInt(e.target.dataset.col);
if (isNaN(col)) return;
if (sortCol === col) sortAsc = !sortAsc;
else { sortCol = col; sortAsc = false; }
renderResults();
});
renderResults();
</script>
</body>
</html>

View File

@@ -0,0 +1,183 @@
<!DOCTYPE html>
<html lang="fi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kipina Model Benchmark</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
th:hover { color: var(--text); }
th.sorted-asc::after { content: ' ▲'; }
th.sorted-desc::after { content: ' ▼'; }
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
tr:hover td { background: #1c2128; }
.pass { color: var(--green); }
.partial { color: var(--yellow); }
.fail { color: var(--red); }
.stars { letter-spacing: 1px; }
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
.bar-bg { background: var(--border); }
.bar-fill { background: var(--green); }
.bar-partial { background: var(--yellow); }
.model-name { font-weight: 600; }
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
</style>
</head>
<body>
<h1>Kipina Model Benchmark</h1>
<div class="meta" id="meta"></div>
<div class="cards" id="cards"></div>
<h2>Mallikohtainen yhteenveto</h2>
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
<h2>Kaikki tulokset</h2>
<table id="results-table"><thead></thead><tbody></tbody></table>
<script>
const RAW = [{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":63028,"totalTokens":2390,"avgTokPerSec":44.09843659433429,"promptChars":9567,"promptTokensEst":2392,"score":100,"stars":"★★★★★","error":null},{"model":"codestral:22b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":4,"testsPassed":4,"testsFailed":0,"totalDurationMs":58359,"totalTokens":2313,"avgTokPerSec":44.04431775388366,"promptChars":9641,"promptTokensEst":2410,"score":100,"stars":"★★★★★","error":null},{"model":"codestral:22b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":52020,"totalTokens":2073,"avgTokPerSec":44.03716103774298,"promptChars":10007,"promptTokensEst":2502,"score":40,"stars":"★★☆☆☆","error":null},{"model":"mistral-small3.1:24b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":76602,"totalTokens":2820,"avgTokPerSec":41.65340751865168,"promptChars":10816,"promptTokensEst":2704,"score":91,"stars":"★★★★★","error":null},{"model":"mistral-small3.1:24b","scenario":"users","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":11004,"promptTokensEst":2751,"score":0,"stars":"","error":"Puuttuvat: test_main.py"},{"model":"mistral-small3.1:24b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":0,"testsPassed":0,"testsFailed":0,"totalDurationMs":0,"totalTokens":0,"avgTokPerSec":0,"promptChars":10573,"promptTokensEst":2643,"score":0,"stars":"","error":"Puuttuvat: test_main.py"},{"model":"devstral:24b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":54454,"totalTokens":1952,"avgTokPerSec":42.767057828688735,"promptChars":9829,"promptTokensEst":2457,"score":40,"stars":"★★☆☆☆","error":null},{"model":"devstral:24b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":5,"testsPassed":1,"testsFailed":4,"totalDurationMs":50447,"totalTokens":1954,"avgTokPerSec":42.79877112859477,"promptChars":9678,"promptTokensEst":2420,"score":52,"stars":"★★★☆☆","error":null},{"model":"devstral:24b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":83061,"totalTokens":3251,"avgTokPerSec":42.647732012717476,"promptChars":10561,"promptTokensEst":2640,"score":40,"stars":"★★☆☆☆","error":null}];
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
function calcScore(r) {
if (r.error && r.testsTotal === 0) return 0;
let s = 0;
if (r.specOk) s += 10;
if (!r.error || r.testsTotal > 0) s += 10;
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
return Math.min(100, s);
}
// Laske pisteet jos puuttuvat
const DATA = RAW.map(r => {
if (r.score == null) r.score = calcScore(r);
if (!r.stars) r.stars = starsFor(r.score);
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
return r;
});
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
const pctBar = (passed, total, w=80) => {
if (total === 0) return '-';
const pct = passed/total*100;
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
};
// Meta
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')}${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
// Cards
const models = [...new Set(DATA.map(r => r.model))];
const scenarios = [...new Set(DATA.map(r => r.scenario))];
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
const bestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
}).sort((a,b) => b.avg - a.avg)[0];
const fastestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
}).sort((a,b) => b.speed - a.speed)[0];
document.getElementById('cards').innerHTML = `
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
`;
// Summary table
const sumHead = document.querySelector('#summary-table thead');
const sumBody = document.querySelector('#summary-table tbody');
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
const modelRows = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
const scenCols = scenarios.map(s => {
const r = mrs.find(r => r.scenario === s);
if (!r) return '<td>-</td>';
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
}).join('');
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
}).sort((a,b) => b.avg - a.avg);
sumBody.innerHTML = modelRows.map(r => r.html).join('');
// Results table
const resHead = document.querySelector('#results-table thead');
const resBody = document.querySelector('#results-table tbody');
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
let sortCol = 9, sortAsc = false;
function renderResults() {
const sorted = [...DATA].sort((a,b) => {
const vals = [
[a.model, b.model],
[a.scenario, b.scenario],
[a.specEntities, b.specEntities],
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
[a.fixRounds, b.fixRounds],
[a.promptTokensEst, b.promptTokensEst],
[a.totalTokens, b.totalTokens],
[a.totalDurationMs, b.totalDurationMs],
[a.avgTokPerSec, b.avgTokPerSec],
[a.score, b.score],
][sortCol];
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
return sortAsc ? cmp : -cmp;
});
resBody.innerHTML = sorted.map(r => {
const c = cls(r);
return `<tr>
<td class="model-name">${r.model}</td>
<td>${r.scenario}</td>
<td>${r.specOk ? `${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
<td>${r.avgTokPerSec.toFixed(0)}</td>
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
</tr>`;
}).join('');
document.querySelectorAll('#results-table th').forEach((th,i) => {
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
});
}
document.querySelector('#results-table thead').addEventListener('click', e => {
const col = parseInt(e.target.dataset.col);
if (isNaN(col)) return;
if (sortCol === col) sortAsc = !sortAsc;
else { sortCol = col; sortAsc = false; }
renderResults();
});
renderResults();
</script>
</body>
</html>

View File

@@ -0,0 +1,182 @@
[
{
"model": "codestral:22b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 63028,
"totalTokens": 2390,
"avgTokPerSec": 44.09843659433429,
"promptChars": 9567,
"promptTokensEst": 2392,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "codestral:22b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 4,
"testsPassed": 4,
"testsFailed": 0,
"totalDurationMs": 58359,
"totalTokens": 2313,
"avgTokPerSec": 44.04431775388366,
"promptChars": 9641,
"promptTokensEst": 2410,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "codestral:22b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 52020,
"totalTokens": 2073,
"avgTokPerSec": 44.03716103774298,
"promptChars": 10007,
"promptTokensEst": 2502,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "mistral-small3.1:24b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 76602,
"totalTokens": 2820,
"avgTokPerSec": 41.65340751865168,
"promptChars": 10816,
"promptTokensEst": 2704,
"score": 91,
"stars": "★★★★★",
"error": null
},
{
"model": "mistral-small3.1:24b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 11004,
"promptTokensEst": 2751,
"score": 0,
"stars": "",
"error": "Puuttuvat: test_main.py"
},
{
"model": "mistral-small3.1:24b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 10573,
"promptTokensEst": 2643,
"score": 0,
"stars": "",
"error": "Puuttuvat: test_main.py"
},
{
"model": "devstral:24b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 54454,
"totalTokens": 1952,
"avgTokPerSec": 42.767057828688735,
"promptChars": 9829,
"promptTokensEst": 2457,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "devstral:24b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 5,
"testsPassed": 1,
"testsFailed": 4,
"totalDurationMs": 50447,
"totalTokens": 1954,
"avgTokPerSec": 42.79877112859477,
"promptChars": 9678,
"promptTokensEst": 2420,
"score": 52,
"stars": "★★★☆☆",
"error": null
},
{
"model": "devstral:24b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 83061,
"totalTokens": 3251,
"avgTokPerSec": 42.647732012717476,
"promptChars": 10561,
"promptTokensEst": 2640,
"score": 40,
"stars": "★★☆☆☆",
"error": null
}
]

View File

@@ -0,0 +1,183 @@
<!DOCTYPE html>
<html lang="fi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kipina Model Benchmark</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #e6edf3; --dim: #8b949e; --green: #3fb950; --yellow: #d29922; --red: #f85149; --blue: #58a6ff; }
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, 'Segoe UI', Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text); padding: 2rem; max-width: 1400px; margin: 0 auto; }
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
.meta { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.card .label { color: var(--dim); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; }
.card .value { font-size: 1.8rem; font-weight: 600; margin-top: 0.25rem; }
.card .sub { color: var(--dim); font-size: 0.8rem; margin-top: 0.25rem; }
table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 8px; overflow: hidden; margin-bottom: 2rem; }
th { background: #1c2128; text-align: left; padding: 0.6rem 0.8rem; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--dim); cursor: pointer; user-select: none; white-space: nowrap; }
th:hover { color: var(--text); }
th.sorted-asc::after { content: ' ▲'; }
th.sorted-desc::after { content: ' ▼'; }
td { padding: 0.5rem 0.8rem; border-top: 1px solid var(--border); font-size: 0.85rem; white-space: nowrap; }
tr:hover td { background: #1c2128; }
.pass { color: var(--green); }
.partial { color: var(--yellow); }
.fail { color: var(--red); }
.stars { letter-spacing: 1px; }
.bar { display: inline-block; height: 8px; border-radius: 4px; vertical-align: middle; }
.bar-bg { background: var(--border); }
.bar-fill { background: var(--green); }
.bar-partial { background: var(--yellow); }
.model-name { font-weight: 600; }
h2 { font-size: 1.1rem; margin-bottom: 1rem; color: var(--dim); }
.summary-table th:first-child, .summary-table td:first-child { min-width: 200px; }
</style>
</head>
<body>
<h1>Kipina Model Benchmark</h1>
<div class="meta" id="meta"></div>
<div class="cards" id="cards"></div>
<h2>Mallikohtainen yhteenveto</h2>
<table class="summary-table" id="summary-table"><thead></thead><tbody></tbody></table>
<h2>Kaikki tulokset</h2>
<table id="results-table"><thead></thead><tbody></tbody></table>
<script>
const RAW = [{"model":"qwen3.5:35b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":63592,"totalTokens":4103,"avgTokPerSec":88.29857987765199,"promptChars":11310,"promptTokensEst":2828,"score":40,"stars":"★★☆☆☆","error":null},{"model":"qwen3.5:35b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":35262,"totalTokens":2733,"avgTokPerSec":88.26749243915684,"promptChars":10165,"promptTokensEst":2541,"score":40,"stars":"★★☆☆☆","error":null},{"model":"qwen3.5:35b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":1,"testsPassed":0,"testsFailed":1,"totalDurationMs":60346,"totalTokens":4728,"avgTokPerSec":87.67792775342463,"promptChars":11661,"promptTokensEst":2915,"score":40,"stars":"★★☆☆☆","error":null},{"model":"codestral:22b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":5,"testsPassed":4,"testsFailed":1,"totalDurationMs":80515,"totalTokens":3081,"avgTokPerSec":43.828884806830445,"promptChars":10150,"promptTokensEst":2538,"score":88,"stars":"★★★★☆","error":null},{"model":"codestral:22b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":5,"testsPassed":3,"testsFailed":2,"totalDurationMs":61598,"totalTokens":2441,"avgTokPerSec":44.017116943523455,"promptChars":9288,"promptTokensEst":2322,"score":76,"stars":"★★★★☆","error":null},{"model":"codestral:22b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":81568,"totalTokens":3229,"avgTokPerSec":43.67638078062432,"promptChars":10475,"promptTokensEst":2619,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"todo","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":6,"testsPassed":6,"testsFailed":0,"totalDurationMs":30315,"totalTokens":2379,"avgTokPerSec":123.42041099401449,"promptChars":10111,"promptTokensEst":2528,"score":100,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"users","reqOk":true,"specOk":true,"specEntities":1,"validationIssues":0,"fixRounds":0,"testsTotal":7,"testsPassed":6,"testsFailed":1,"totalDurationMs":23071,"totalTokens":2443,"avgTokPerSec":123.11212122029796,"promptChars":9150,"promptTokensEst":2288,"score":91,"stars":"★★★★★","error":null},{"model":"qwen3-coder:30b","scenario":"blog","reqOk":true,"specOk":true,"specEntities":2,"validationIssues":0,"fixRounds":0,"testsTotal":11,"testsPassed":11,"testsFailed":0,"totalDurationMs":40933,"totalTokens":4370,"avgTokPerSec":121.8144240305409,"promptChars":10789,"promptTokensEst":2697,"score":100,"stars":"★★★★★","error":null}];
const starsFor = s => s >= 90 ? '★★★★★' : s >= 70 ? '★★★★☆' : s >= 50 ? '★★★☆☆' : s >= 25 ? '★★☆☆☆' : s > 0 ? '★☆☆☆☆' : '☆☆☆☆☆';
function calcScore(r) {
if (r.error && r.testsTotal === 0) return 0;
let s = 0;
if (r.specOk) s += 10;
if (!r.error || r.testsTotal > 0) s += 10;
if (r.testsTotal > 0) s += Math.round((r.testsPassed / r.testsTotal) * 60);
s += Math.max(0, 20 - (r.fixRounds || 0) * 10);
return Math.min(100, s);
}
// Laske pisteet jos puuttuvat
const DATA = RAW.map(r => {
if (r.score == null) r.score = calcScore(r);
if (!r.stars) r.stars = starsFor(r.score);
if (!r.promptTokensEst) r.promptTokensEst = r.promptChars ? Math.round(r.promptChars / 4) : 0;
return r;
});
const cls = r => (!r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0) ? 'pass' : (r.testsTotal > 0 && r.testsPassed > 0) ? 'partial' : 'fail';
const pctBar = (passed, total, w=80) => {
if (total === 0) return '-';
const pct = passed/total*100;
const c = pct === 100 ? 'bar-fill' : 'bar-partial';
return `<span class="bar bar-bg" style="width:${w}px"><span class="bar ${c}" style="width:${Math.round(pct/100*w)}px"></span></span> ${passed}/${total}`;
};
// Meta
const totalTime = DATA.reduce((s,r) => s + r.totalDurationMs, 0);
document.getElementById('meta').textContent = `${new Date().toLocaleDateString('fi-FI')}${DATA.length} ajoa — ${(totalTime/1000/60).toFixed(1)} min`;
// Cards
const models = [...new Set(DATA.map(r => r.model))];
const scenarios = [...new Set(DATA.map(r => r.scenario))];
const avgScore = DATA.length ? Math.round(DATA.reduce((s,r) => s + r.score, 0) / DATA.length) : 0;
const totalPassed = DATA.reduce((s,r) => s + r.testsPassed, 0);
const totalTests = DATA.reduce((s,r) => s + r.testsTotal, 0);
const passRate = totalTests ? Math.round(totalPassed/totalTests*100) : 0;
const bestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, avg: Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length) };
}).sort((a,b) => b.avg - a.avg)[0];
const fastestModel = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
return { model: m, speed: Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length) };
}).sort((a,b) => b.speed - a.speed)[0];
document.getElementById('cards').innerHTML = `
<div class="card"><div class="label">Keskiarvo</div><div class="value">${starsFor(avgScore)}</div><div class="sub">${avgScore} pistetta</div></div>
<div class="card"><div class="label">Testien läpäisy</div><div class="value">${passRate}%</div><div class="sub">${totalPassed}/${totalTests} testiä</div></div>
<div class="card"><div class="label">Paras malli</div><div class="value" style="font-size:1.2rem">${bestModel?.model || '-'}</div><div class="sub">${bestModel?.avg || 0}p</div></div>
<div class="card"><div class="label">Nopein</div><div class="value" style="font-size:1.2rem">${fastestModel?.model || '-'}</div><div class="sub">${fastestModel?.speed || 0} tok/s</div></div>
<div class="card"><div class="label">Malleja</div><div class="value">${models.length}</div><div class="sub">${scenarios.length} skenaariota</div></div>
<div class="card"><div class="label">Kokonaisaika</div><div class="value">${(totalTime/1000/60).toFixed(1)}</div><div class="sub">minuuttia</div></div>
`;
// Summary table
const sumHead = document.querySelector('#summary-table thead');
const sumBody = document.querySelector('#summary-table tbody');
sumHead.innerHTML = '<tr><th>Malli</th>' + scenarios.map(s => `<th>${s}</th>`).join('') + '<th>Yht.</th><th>Out tok</th><th>Aika</th><th>tok/s</th><th>Pisteet</th></tr>';
const modelRows = models.map(m => {
const mrs = DATA.filter(r => r.model === m);
const tp = mrs.reduce((s,r) => s + r.testsPassed, 0);
const tt = mrs.reduce((s,r) => s + r.testsTotal, 0);
const tok = mrs.reduce((s,r) => s + r.totalTokens, 0);
const time = mrs.reduce((s,r) => s + r.totalDurationMs, 0);
const speed = Math.round(mrs.reduce((s,r) => s + r.avgTokPerSec, 0) / mrs.length);
const avg = Math.round(mrs.reduce((s,r) => s + r.score, 0) / mrs.length);
const scenCols = scenarios.map(s => {
const r = mrs.find(r => r.scenario === s);
if (!r) return '<td>-</td>';
return `<td class="${cls(r)}">${pctBar(r.testsPassed, r.testsTotal, 60)} <span style="color:var(--dim)">${(r.totalDurationMs/1000).toFixed(0)}s</span></td>`;
}).join('');
return { avg, html: `<tr><td class="model-name">${m}</td>${scenCols}<td>${pctBar(tp, tt)}</td><td>${(tok/1000).toFixed(1)}K</td><td>${(time/1000).toFixed(0)}s</td><td>${speed}</td><td><span class="stars">${starsFor(avg)}</span> ${avg}p</td></tr>` };
}).sort((a,b) => b.avg - a.avg);
sumBody.innerHTML = modelRows.map(r => r.html).join('');
// Results table
const resHead = document.querySelector('#results-table thead');
const resBody = document.querySelector('#results-table tbody');
const resCols = ['Malli','Skenaario','Speksi','Testit','Korjaus','Ctx','Out tok','Aika','tok/s','Pisteet'];
resHead.innerHTML = '<tr>' + resCols.map((c,i) => `<th data-col="${i}">${c}</th>`).join('') + '</tr>';
let sortCol = 9, sortAsc = false;
function renderResults() {
const sorted = [...DATA].sort((a,b) => {
const vals = [
[a.model, b.model],
[a.scenario, b.scenario],
[a.specEntities, b.specEntities],
[a.testsPassed/Math.max(a.testsTotal,1), b.testsPassed/Math.max(b.testsTotal,1)],
[a.fixRounds, b.fixRounds],
[a.promptTokensEst, b.promptTokensEst],
[a.totalTokens, b.totalTokens],
[a.totalDurationMs, b.totalDurationMs],
[a.avgTokPerSec, b.avgTokPerSec],
[a.score, b.score],
][sortCol];
const cmp = typeof vals[0] === 'string' ? vals[0].localeCompare(vals[1]) : vals[0] - vals[1];
return sortAsc ? cmp : -cmp;
});
resBody.innerHTML = sorted.map(r => {
const c = cls(r);
return `<tr>
<td class="model-name">${r.model}</td>
<td>${r.scenario}</td>
<td>${r.specOk ? `${r.specEntities}e` : '<span class="fail">✗</span>'}</td>
<td class="${c}">${pctBar(r.testsPassed, r.testsTotal)}</td>
<td>${r.fixRounds > 0 ? r.fixRounds + '×' : '-'}</td>
<td>${r.promptTokensEst > 0 ? '~'+(r.promptTokensEst/1000).toFixed(1)+'K' : '-'}</td>
<td>${r.totalTokens > 0 ? (r.totalTokens/1000).toFixed(1)+'K' : '-'}</td>
<td>${(r.totalDurationMs/1000).toFixed(0)}s</td>
<td>${r.avgTokPerSec.toFixed(0)}</td>
<td><span class="stars">${r.stars}</span> ${r.score}p</td>
</tr>`;
}).join('');
document.querySelectorAll('#results-table th').forEach((th,i) => {
th.className = i === sortCol ? (sortAsc ? 'sorted-asc' : 'sorted-desc') : '';
});
}
document.querySelector('#results-table thead').addEventListener('click', e => {
const col = parseInt(e.target.dataset.col);
if (isNaN(col)) return;
if (sortCol === col) sortAsc = !sortAsc;
else { sortCol = col; sortAsc = false; }
renderResults();
});
renderResults();
</script>
</body>
</html>

View File

@@ -0,0 +1,182 @@
[
{
"model": "qwen3.5:35b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 63592,
"totalTokens": 4103,
"avgTokPerSec": 88.29857987765199,
"promptChars": 11310,
"promptTokensEst": 2828,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3.5:35b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 35262,
"totalTokens": 2733,
"avgTokPerSec": 88.26749243915684,
"promptChars": 10165,
"promptTokensEst": 2541,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3.5:35b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 60346,
"totalTokens": 4728,
"avgTokPerSec": 87.67792775342463,
"promptChars": 11661,
"promptTokensEst": 2915,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "codestral:22b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 5,
"testsPassed": 4,
"testsFailed": 1,
"totalDurationMs": 80515,
"totalTokens": 3081,
"avgTokPerSec": 43.828884806830445,
"promptChars": 10150,
"promptTokensEst": 2538,
"score": 88,
"stars": "★★★★☆",
"error": null
},
{
"model": "codestral:22b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 5,
"testsPassed": 3,
"testsFailed": 2,
"totalDurationMs": 61598,
"totalTokens": 2441,
"avgTokPerSec": 44.017116943523455,
"promptChars": 9288,
"promptTokensEst": 2322,
"score": 76,
"stars": "★★★★☆",
"error": null
},
{
"model": "codestral:22b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 81568,
"totalTokens": 3229,
"avgTokPerSec": 43.67638078062432,
"promptChars": 10475,
"promptTokensEst": 2619,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 30315,
"totalTokens": 2379,
"avgTokPerSec": 123.42041099401449,
"promptChars": 10111,
"promptTokensEst": 2528,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 23071,
"totalTokens": 2443,
"avgTokPerSec": 123.11212122029796,
"promptChars": 9150,
"promptTokensEst": 2288,
"score": 91,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 40933,
"totalTokens": 4370,
"avgTokPerSec": 121.8144240305409,
"promptChars": 10789,
"promptTokensEst": 2697,
"score": 100,
"stars": "★★★★★",
"error": null
}
]