diff --git a/network-poc/kipina-codebench/Dockerfile.pytest b/network-poc/kipina-codebench/Dockerfile.pytest new file mode 100644 index 0000000..c60a05f --- /dev/null +++ b/network-poc/kipina-codebench/Dockerfile.pytest @@ -0,0 +1,5 @@ +FROM python:3.14-slim +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv +WORKDIR /work +ENV PYTHONPATH=/work +ENTRYPOINT ["sh", "-c", "uv init --no-readme --python '>=3.14' 2>/dev/null && rm -f hello.py main.py && uv add fastapi 'uvicorn[standard]' sqlalchemy pytest httpx 2>/dev/null && cp /src/*.py . && rm -f app.db test.db && uv run pytest test_main.py -v --tb=short 2>&1"] diff --git a/network-poc/kipina-codebench/README.md b/network-poc/kipina-codebench/README.md new file mode 100644 index 0000000..208f0d5 --- /dev/null +++ b/network-poc/kipina-codebench/README.md @@ -0,0 +1,95 @@ +# Kipinä CodeBench + +LLM-koodingenerointibenchmark. Testaa Ollama-mallien kykyä generoida toimivia FastAPI+SQLAlchemy-projekteja ja ajaa testit Docker-kontissa. + +## Pikastart + +```bash +# 1. Rakenna Docker-testikontti +docker build -t kipina-pytest -f Dockerfile.pytest . + +# 2. Aja benchmark +node benchmark.mjs --ollama http://localhost:11434 --scenarios all + +# 3. Avaa raportti +open /tmp/kipina-benchmark/report.html +``` + +## Pipeline + +``` +1. LLM → vaatimusmäärittely (prompts/client.md) +2. LLM → JSON-speksi (prompts/spec.md) +3. LLM → 4 Python-tiedostoa (prompts/code.md + golden-examples/) +4. Staattinen validointi + LLM-korjaus (prompts/fix.md) +5. Docker: uv init + uv add + pytest +``` + +## CLI-argumentit + +| Argumentti | Oletus | Kuvaus | +|-----------|--------|--------| +| `--ollama` | `http://localhost:11434` | Ollama-palvelimen URL | +| `--hub` | - | Hub-reitti (vaihtoehto Ollamalle) | +| `--models` | kaikki | Pilkuilla erotettu mallilista | +| `--scenarios` | `default` (todo) | `all` = todo, users, blog | +| `--output` | `/tmp/kipina-benchmark` | Tuloshakemisto | + +## Hakemistorakenne + +``` +kipina-codebench/ +├── benchmark.mjs ← runner +├── Dockerfile.pytest ← Python 3.14 + uv testikontti +├── report-template.html ← HTML-raporttipohja +├── package.json +├── prompts/ ← muokattavat promptit +│ ├── client.md ← vaatimusmäärittely +│ ├── spec.md ← JSON-speksi +│ ├── code.md ← koodigenerointi +│ └── fix.md ← korjaus +├── golden-examples/ ← referenssitoteutukset +│ ├── todo/ ← taso 1: perus-CRUD (6 testiä) +│ ├── blog/ ← taso 2: relaatiot (13 testiä) +│ └── DOCUMENTATION.md ← zensical-dokumentointiohjeet +└── results/ ← tallennetut tulokset +``` + +## Promptien muokkaus + +Promptit ovat `prompts/`-kansiossa Markdown-tiedostoina. Muokkaa suoraan — benchmark lataa ne käynnistyksessä. + +Esimerkki: lisää sääntö `prompts/code.md`:hen: +``` +- Tests: PUT/update test data MUST include ALL required fields +``` + +## Kultaiset esimerkit + +`golden-examples/todo/` syötetään LLM:lle referenssinä. Malli näkee tarkalleen millaista koodia odotetaan: +- SQLAlchemy 2.0 (DeclarativeBase, Mapped, mapped_column) +- Pydantic v2 (ConfigDict) +- Python 3.14 syntaksi (str | None) +- Uniikki testidata per testi + +Lisää uusia esimerkkejä luomalla hakemisto (esim. `golden-examples/shop/`). + +## Pisteytys + +| Komponentti | Pisteet | Peruste | +|---|---|---| +| Speksi OK | 10p | JSON-speksi onnistui | +| Koodi generoitu | 10p | Kaikki 4 tiedostoa syntyneet | +| Testit | 0–60p | passed/total × 60 | +| Korjaukset | 0–20p | 0 kierrosta = 20p, 1 = 10p, 2+ = 0p | + +Tähdet: ★★★★★ (90+), ★★★★☆ (70+), ★★★☆☆ (50+), ★★☆☆☆ (25+), ★☆☆☆☆ (1+) + +## Käyttö git-submodulena + +```bash +git submodule add tools/codebench +cd tools/codebench +docker build -t kipina-pytest -f Dockerfile.pytest . +node benchmark.mjs --ollama http://localhost:11434 --scenarios all +``` diff --git a/network-poc/kipina-codebench/benchmark.mjs b/network-poc/kipina-codebench/benchmark.mjs new file mode 100644 index 0000000..1d08e8c --- /dev/null +++ b/network-poc/kipina-codebench/benchmark.mjs @@ -0,0 +1,490 @@ +#!/usr/bin/env node +/** + * Kipinä CodeBench — LLM-koodingenerointibenchmark + * + * Generoi FastAPI-projekteja Ollama-malleilla ja testaa pytest:llä Docker-kontissa. + * + * Käyttö: + * node benchmark.mjs # kaikki mallit, oletusskenaario + * node benchmark.mjs --models qwen3-coder:30b # yksi malli + * node benchmark.mjs --ollama http://host:11434 # eri Ollama + * node benchmark.mjs --scenarios all # kaikki skenaariot + * node benchmark.mjs --output ./results/run-001 # custom output-hakemisto + */ + +import { execSync } from 'child_process'; +import { writeFileSync, readFileSync, mkdirSync, rmSync, existsSync, readdirSync } from 'fs'; +import { dirname, join } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// === CLI-argumentit === +const args = process.argv.slice(2); +function arg(name, fallback) { + const i = args.indexOf(`--${name}`); + return i >= 0 && args[i + 1] ? args[i + 1] : fallback; +} +const OLLAMA_URL = arg('ollama', process.env.OLLAMA_URL || 'http://localhost:11434'); +const HUB_URL = arg('hub', ''); +const FILTER_MODELS = arg('models', ''); +const SCENARIO_FILTER = arg('scenarios', 'default'); +const OUTPUT_DIR = arg('output', '/tmp/kipina-benchmark'); +const MAX_FIX_ROUNDS = 2; + +// === Promptien lataus tiedostoista === +function loadPrompt(name) { + const path = join(__dirname, 'prompts', `${name}.md`); + if (!existsSync(path)) throw new Error(`Prompti puuttuu: ${path}`); + return readFileSync(path, 'utf-8').trim(); +} +const CLIENT_SYSTEM = loadPrompt('client'); +const SPEC_SYSTEM = loadPrompt('spec'); +const CODE_SYSTEM = loadPrompt('code'); +const FIX_SYSTEM = loadPrompt('fix'); + +// === Kultaisten esimerkkien lataus === +const GOLDEN_DIR = join(__dirname, 'golden-examples'); +const GOLDEN_PY_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py']; + +function loadGoldenExample() { + const todoDir = join(GOLDEN_DIR, 'todo'); + if (!existsSync(todoDir)) return ''; + let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n'; + for (const f of GOLDEN_PY_FILES) { + const path = join(todoDir, f); + if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`; + } + return example; +} +const GOLDEN_EXAMPLE = loadGoldenExample(); + +// === Ajattelutagien siivous (gemma4, qwen3/3.5 ym.) === +function stripThinking(text) { + return text + .replace(/<\|channel>thought[\s\S]*?/g, '') // gemma4 + .replace(/[\s\S]*?<\/think>/g, '') // qwen3, qwen3.5 + .trim(); +} + +// === Ollama / Hub -client === +async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) { + const start = Date.now(); + + if (HUB_URL) { + const taskId = `bench-${Date.now()}-${Math.random().toString(36).slice(2,8)}`; + const resp = await fetch(`${HUB_URL}/api/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model, prompt, task_id: taskId, system_prompt: systemPrompt, max_tokens: maxTokens }), + }); + if (!resp.ok) throw new Error(`Hub HTTP ${resp.status}: ${await resp.text()}`); + const data = await resp.json(); + const elapsed = Date.now() - start; + return { + text: stripThinking((data.response || '').trim()), + tokens: data.tokens_generated || 0, + durationMs: elapsed, + tokPerSec: data.tokens_per_sec || (data.tokens_generated || 0) / (elapsed / 1000), + }; + } + + // Suora Ollama-reitti + const messages = []; + if (systemPrompt) messages.push({ role: 'system', content: systemPrompt }); + messages.push({ role: 'user', content: prompt }); + + const resp = await fetch(`${OLLAMA_URL}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + messages, + stream: false, + think: false, + options: { num_predict: maxTokens, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 }, + }), + }); + if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`); + const data = await resp.json(); + const elapsed = Date.now() - start; + const rawContent = (data.message?.content || '').trim(); + const thinking = (data.message?.thinking || '').trim(); + const text = stripThinking(rawContent || thinking); + const evalCount = data.eval_count || 0; + if (!rawContent && thinking) console.log(` ⚠ thinking-malli: ${thinking.length} merkkiä ajattelua, content tyhjä`); + const evalDurationNs = data.eval_duration || 1; + const tokPerSec = evalCount / (evalDurationNs / 1e9); + return { text, tokens: evalCount, durationMs: elapsed, tokPerSec }; +} + +async function ollamaListModels() { + const url = HUB_URL ? `${HUB_URL}/api/v1/ollama/tags` : `${OLLAMA_URL}/api/tags`; + const resp = await fetch(url); + if (!resp.ok) throw new Error(`Tags: HTTP ${resp.status}`); + const data = await resp.json(); + return (data.models || []).map(m => m.name); +} + +// === Tiedostoparseri LLM-vastauksesta === +function parseGeneratedFiles(text) { + const files = {}; + const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/); + for (let i = 1; i < sections.length - 1; i += 2) { + const name = sections[i]; + let content = sections[i + 1].trim(); + content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim(); + if (content) files[name] = content + '\n'; + } + return files; +} + +// === Validaattori === +function validateProjectCode(files) { + const issues = []; + for (const [fname, code] of Object.entries(files)) { + if (!fname.endsWith('.py')) continue; + const lines = code.split('\n'); + for (const line of lines) { + if (/^from\s+\.(\w*)\s+import/.test(line)) issues.push(`ISSUE: ${fname}: relatiivinen import`); + } + for (const line of lines) { + const m = line.match(/^from\s+(models|schemas|main)\s+import\s+(.+)/); + if (!m) continue; + const srcCode = files[m[1] + '.py']; + if (!srcCode) { issues.push(`ISSUE: ${fname}: ${m[1]}.py puuttuu`); continue; } + const names = m[2].split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim()); + for (const name of names) { + if (name && !srcCode.includes(name)) issues.push(`ISSUE: ${fname}: "${name}" puuttuu ${m[1]}.py:stä`); + } + } + if (fname === 'schemas.py') { + if (/:\s*date\b/.test(code) && !/from datetime import/.test(code)) + issues.push('ISSUE: schemas.py: date-import puuttuu'); + if (/:\s*datetime\b/.test(code) && !/from datetime import/.test(code)) + issues.push('ISSUE: schemas.py: datetime-import puuttuu'); + } + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (/^\s*#/.test(line) || /^\s*$/.test(line)) continue; + if (/(? 0) score += 10; + if (r.testsTotal > 0) score += Math.round((r.testsPassed / r.testsTotal) * 60); + score += Math.max(0, 20 - r.fixRounds * 10); + return Math.min(100, score); +} +function starsForScore(score) { + if (score >= 90) return '★★★★★'; + if (score >= 70) return '★★★★☆'; + if (score >= 50) return '★★★☆☆'; + if (score >= 25) return '★★☆☆☆'; + if (score > 0) return '★☆☆☆☆'; + return '☆☆☆☆☆'; +} + +// === Pipeline: yhdelle mallille ja skenaariolle === +async function runPipeline(model, scenario) { + const result = { + model, scenario: scenario.id, + reqOk: false, specOk: false, specEntities: 0, + validationIssues: 0, fixRounds: 0, + testsTotal: 0, testsPassed: 0, testsFailed: 0, + totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0, + promptChars: 0, promptTokensEst: 0, + score: 0, stars: '', + error: null, + }; + const timings = []; + const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`; + mkdirSync(dir, { recursive: true }); + + try { + // 1. Vaatimukset + console.log(` [1/5] Vaatimukset...`); + const req = await ollamaChat(model, scenario.prompt, CLIENT_SYSTEM, 2048); + timings.push(req); + if (!req.text || req.text.length < 50) { result.error = 'Vaatimukset liian lyhyet'; return result; } + result.reqOk = true; + writeFileSync(`${dir}/_requirements.txt`, req.text); + + // 2. JSON-speksi + console.log(` [2/5] JSON-speksi...`); + const specResp = await ollamaChat(model, `${req.text}\n\nOutput a JSON spec for this project.`, SPEC_SYSTEM, 4096); + timings.push(specResp); + const spec = extractJson(specResp.text); + if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; } + result.specOk = true; + result.specEntities = spec.entities.length; + writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2)); + + // 3. LLM-koodigenerointi + console.log(` [3/5] Koodigenerointi (LLM)...`); + const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 4 files. Follow the reference implementation patterns exactly.`; + result.promptChars = CODE_SYSTEM.length + codePrompt.length; + result.promptTokensEst = Math.round(result.promptChars / 4); + const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192); + timings.push(codeResp); + writeFileSync(`${dir}/_code_raw.txt`, codeResp.text); + const files = parseGeneratedFiles(codeResp.text); + const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py']; + const missing = required.filter(f => !files[f]); + if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; } + + // 4. Validointi + korjaussilmukka + let issues = validateProjectCode(files); + let fixRound = 0; + while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) { + fixRound++; + console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`); + const issuesByFile = {}; + for (const issue of issues) { + const m = issue.match(/^ISSUE:\s*(\S+?):/); + const fname = m ? m[1] : 'unknown'; + if (!issuesByFile[fname]) issuesByFile[fname] = []; + issuesByFile[fname].push(issue); + } + for (const [fname, fIssues] of Object.entries(issuesByFile)) { + if (!files[fname]) continue; + const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``; + const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048); + timings.push(fixResp); + if (fixResp.text) { + files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n'; + } + } + issues = validateProjectCode(files); + } + result.validationIssues = issues.length; + result.fixRounds = fixRound; + + // Kirjoita LLM:n generoimat Python-tiedostot + for (const [fn, content] of Object.entries(files)) { + if (fn.endsWith('.py')) writeFileSync(`${dir}/${fn}`, content); + } + + // 5. Pytest Docker-kontissa (kipina-pytest image) + console.log(` [5/5] Pytest (Docker)...`); + try { + const pytestOut = execSync( + `docker run --rm -v "${dir}:/src:ro" kipina-pytest 2>&1`, + { timeout: 120000, encoding: 'utf-8' } + ); + writeFileSync(`${dir}/_pytest.txt`, pytestOut); + const passedMatch = pytestOut.match(/(\d+) passed/); + const failedMatch = pytestOut.match(/(\d+) failed/); + result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0; + result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0; + result.testsTotal = result.testsPassed + result.testsFailed; + } catch (e) { + const output = e.stdout || e.stderr || e.message || ''; + writeFileSync(`${dir}/_pytest.txt`, output); + const passedMatch = output.match(/(\d+) passed/); + const failedMatch = output.match(/(\d+) failed/); + const errorMatch = output.match(/(\d+) error/); + result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0; + result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0); + result.testsTotal = result.testsPassed + result.testsFailed; + if (result.testsTotal === 0) result.error = 'Pytest kaatui'; + } + } catch (e) { + result.error = e.message; + } + + // Yhteenveto + result.totalDurationMs = timings.reduce((s, t) => s + t.durationMs, 0); + result.totalTokens = timings.reduce((s, t) => s + t.tokens, 0); + result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0; + result.score = scoreResult(result); + result.stars = starsForScore(result.score); + + return result; +} + +// === Main === +async function main() { + console.log('╔══════════════════════════════════════════════╗'); + console.log('║ Kipinä CodeBench ║'); + console.log('╚══════════════════════════════════════════════╝'); + console.log(`Ollama: ${OLLAMA_URL}`); + + // Haetaan mallit + let models; + try { + models = await ollamaListModels(); + } catch (e) { + console.error(`Ei yhteyttä Ollamaan (${OLLAMA_URL}): ${e.message}`); + process.exit(1); + } + + if (FILTER_MODELS) { + const filter = FILTER_MODELS.split(',').map(s => s.trim()); + models = models.filter(m => filter.some(f => m.includes(f))); + } + + console.log(`Mallit (${models.length}): ${models.join(', ')}`); + + const scenarios = SCENARIO_FILTER === 'all' ? SCENARIOS : [SCENARIOS[0]]; + console.log(`Skenaariot (${scenarios.length}): ${scenarios.map(s => s.id).join(', ')}`); + console.log(`Tulokset: ${OUTPUT_DIR}/`); + console.log(''); + + // Puhdista output + rmSync(OUTPUT_DIR, { recursive: true, force: true }); + mkdirSync(OUTPUT_DIR, { recursive: true }); + + const results = []; + + for (const model of models) { + for (const scenario of scenarios) { + console.log(`\n━━━ ${model} × ${scenario.id} ━━━`); + const r = await runPipeline(model, scenario); + results.push(r); + + const status = r.error ? `✗ ${r.error}` : + r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` : + `◐ ${r.testsPassed}/${r.testsTotal}`; + const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : ''; + console.log(` → ${status} | ${r.stars} ${r.score}p | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`); + } + } + + // === Tulostaulu === + console.log('\n\n╔══════════════════════════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ TULOKSET ║'); + console.log('╠══════════════════════════════════════════════════════════════════════════════════════════════════╣'); + + const header = [ + 'Malli'.padEnd(40), + 'Skenaario'.padEnd(10), + 'Speksi'.padEnd(8), + 'Testit'.padEnd(10), + 'Korjaus'.padEnd(8), + 'Ctx'.padEnd(7), + 'Aika'.padEnd(8), + 'tok/s'.padEnd(8), + 'Pisteet', + ].join(' │ '); + console.log(`║ ${header} ║`); + console.log('╠' + '═'.repeat(header.length + 2) + '╣'); + + for (const r of results) { + const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗'; + const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-'; + const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-'; + const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-'; + const time = `${(r.totalDurationMs/1000).toFixed(0)}s`; + const speed = `${r.avgTokPerSec.toFixed(0)}`; + const row = [ + r.model.padEnd(40), + r.scenario.padEnd(10), + specStatus.padEnd(8), + testStatus.padEnd(10), + fixStatus.padEnd(8), + ctx.padEnd(7), + time.padEnd(8), + speed.padEnd(8), + `${r.stars} ${r.score}`, + ].join(' │ '); + console.log(`║ ${row} ║`); + } + console.log('╚' + '═'.repeat(header.length + 2) + '╝'); + + // === Mallikohtainen yhteenveto === + const modelNames = [...new Set(results.map(r => r.model))]; + const scenarioIds = scenarios.map(s => s.id); + + console.log('\n'); + const mHeader = [ + 'Malli'.padEnd(35), + ...scenarioIds.map(s => s.padEnd(22)), + 'Yht.'.padEnd(8), + 'Out'.padEnd(7), + 'Aika'.padEnd(8), + 'tok/s'.padEnd(7), + 'Pisteet', + ].join(' │ '); + console.log(mHeader); + console.log('─'.repeat(mHeader.length)); + + for (const model of modelNames) { + const mrs = results.filter(r => r.model === model); + const cols = scenarioIds.map(sid => { + const r = mrs.find(r => r.scenario === sid); + if (!r) return '-'.padEnd(22); + const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-'; + const s = `${(r.totalDurationMs/1000).toFixed(0)}s`; + const tok = r.totalTokens > 1000 ? `${(r.totalTokens/1000).toFixed(1)}K` : `${r.totalTokens}`; + return `${t} ${s} ${tok}`.padEnd(22); + }); + const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0); + const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0); + const totalTokens = mrs.reduce((s, r) => s + r.totalTokens, 0); + const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0); + const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0; + const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0; + const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0; + const tokStr = totalTokens > 1000 ? `${(totalTokens/1000).toFixed(1)}K` : `${totalTokens}`; + const row = [ + model.padEnd(35), + ...cols, + `${totalPassed}/${totalTests}`.padEnd(8), + tokStr.padEnd(7), + `${(totalTime/1000).toFixed(0)}s`.padEnd(8), + `${avgSpeed}`.padEnd(7), + `${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`, + ].join(' │ '); + console.log(row); + } + + // Tallenna JSON + HTML-raportti + writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2)); + const templatePath = join(__dirname, 'report-template.html'); + if (existsSync(templatePath)) { + const html = readFileSync(templatePath, 'utf-8').replace( + '/*DATA_PLACEHOLDER*/[]', + JSON.stringify(results) + ); + writeFileSync(`${OUTPUT_DIR}/report.html`, html); + console.log(`\nRaportti: ${OUTPUT_DIR}/report.html`); + } + console.log(`JSON: ${OUTPUT_DIR}/results.json`); + + // Yhteenveto + const passed = results.filter(r => !r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0); + const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0); + const failed = results.filter(r => r.error || r.testsTotal === 0); + const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0; + const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0); + console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`); +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/network-poc/kipina-codebench/golden-examples/DOCUMENTATION.md b/network-poc/kipina-codebench/golden-examples/DOCUMENTATION.md new file mode 100644 index 0000000..6caa6c2 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/DOCUMENTATION.md @@ -0,0 +1,84 @@ +# Dokumentointiohjeet — Zensical + +Hyvä dokumentointi kertoo **mitä asia ON**, ei mitä se tekee. Se on kuin zen-koan: lyhyt, tarkka, riittävä. + +## Periaatteet + +1. **Yksi rivi riittää.** Jos tarvitset kappaleen, koodi on liian monimutkainen. +2. **Kerro mitä, älä miten.** `"""Tietokantamallit — SQLAlchemy 2.0, SQLite."""` ei `"""This module creates database models using SQLAlchemy..."""` +3. **Älä toista koodia.** Jos funktio on `create_todo`, docstring ei ole "Creates a todo". +4. **Suomi tai englanti, ei molempia.** Valitse yksi kieli per projekti. +5. **Ei täytesanoja.** "This module provides functionality for" → poista. + +## Mitä dokumentoidaan + +| Kohde | Dokumentointi | Esimerkki | +|-------|--------------|-----------| +| **Moduuli** (.py) | Aina. Yksi rivi: mitä tiedosto sisältää. | `"""Pydantic v2 -skeemat — Create ja Response."""` | +| **Luokka** | Aina. Mitä entiteetti edustaa. | `"""Tehtävä — otsikko, deadline, prioriteetti."""` | +| **Funktio** | Vain jos nimi ei kerro kaikkea. | `get_db` → `"""Tietokantasessio per pyyntö."""` | +| **CRUD-endpoint** | Ei. Nimi + HTTP-metodi riittää. | `create_todo`, `list_todos` — itsedokumentoivia | +| **Testi** | Ei. Testin nimi on dokumentaatio. | `test_get_todo_not_found` — selvä | +| **Konfiguraatio** | Kommentti vain jos arvo yllättää. | `check_same_thread: False # SQLite + FastAPI` | + +## Mitä EI dokumentoida + +- Importteja +- Ilmeisiä parametreja (`item_id: int`) +- Tyyppivihjeitä jotka kertovat saman asian +- Geneerisiä "boilerplate"-docstringejä + +## Esimerkkejä + +### Hyvä (zensical) + +```python +"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite.""" + +class Todo(Base): + """Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status.""" + ... + +def get_db(): + """Tietokantasessio per pyyntö.""" + ... +``` + +### Huono (verbose) + +```python +""" +This module defines the database models for the Todo application. +It uses SQLAlchemy ORM to create the database tables and provides +the session factory for database connections. +""" + +class Todo(Base): + """ + Represents a todo item in the database. + + Attributes: + id: The unique identifier for the todo item. + title: The title of the todo item. + ... + """ + ... +``` + +### Huono (tyhjä) + +```python +# Ei docstringejä ollenkaan — lukija ei tiedä mikä tiedoston rooli on +class Todo(Base): + __tablename__ = "todos" + ... +``` + +## Tarkistuslista + +Generoitu koodi on hyvin dokumentoitu kun: +- [ ] Jokainen .py-tiedosto alkaa yksirivisellä docstringillä +- [ ] Jokainen luokka kertoo mitä entiteetti edustaa +- [ ] Docstringit ovat saman kielen kuin muu koodi +- [ ] CRUD-endpointeilla ei ole turhia docstringejä +- [ ] Kommentteja on vain siellä missä koodi yllättää diff --git a/network-poc/kipina-codebench/golden-examples/blog/main.py b/network-poc/kipina-codebench/golden-examples/blog/main.py new file mode 100644 index 0000000..b31c697 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/blog/main.py @@ -0,0 +1,110 @@ +"""FastAPI CRUD — kaksi endpoint-settiä, Author ja Post.""" + +from fastapi import FastAPI, Depends, HTTPException +from sqlalchemy.orm import Session + +from models import SessionLocal, Author, Post +from schemas import AuthorCreate, AuthorResponse, PostCreate, PostResponse + +app = FastAPI() + + +def get_db(): + """Tietokantasessio per pyyntö.""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +# --- Author --- + + +@app.post("/authors/", response_model=AuthorResponse, status_code=201) +def create_author(item: AuthorCreate, db: Session = Depends(get_db)): + db_item = Author(**item.model_dump()) + db.add(db_item) + db.commit() + db.refresh(db_item) + return db_item + + +@app.get("/authors/", response_model=list[AuthorResponse]) +def list_authors(db: Session = Depends(get_db)): + return db.query(Author).all() + + +@app.get("/authors/{item_id}", response_model=AuthorResponse) +def get_author(item_id: int, db: Session = Depends(get_db)): + item = db.query(Author).filter(Author.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Author not found") + return item + + +@app.put("/authors/{item_id}", response_model=AuthorResponse) +def update_author(item_id: int, item: AuthorCreate, db: Session = Depends(get_db)): + db_item = db.query(Author).filter(Author.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Author not found") + for key, value in item.model_dump().items(): + setattr(db_item, key, value) + db.commit() + db.refresh(db_item) + return db_item + + +@app.delete("/authors/{item_id}", status_code=204) +def delete_author(item_id: int, db: Session = Depends(get_db)): + db_item = db.query(Author).filter(Author.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Author not found") + db.delete(db_item) + db.commit() + + +# --- Post --- + + +@app.post("/posts/", response_model=PostResponse, status_code=201) +def create_post(item: PostCreate, db: Session = Depends(get_db)): + db_item = Post(**item.model_dump()) + db.add(db_item) + db.commit() + db.refresh(db_item) + return db_item + + +@app.get("/posts/", response_model=list[PostResponse]) +def list_posts(db: Session = Depends(get_db)): + return db.query(Post).all() + + +@app.get("/posts/{item_id}", response_model=PostResponse) +def get_post(item_id: int, db: Session = Depends(get_db)): + item = db.query(Post).filter(Post.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Post not found") + return item + + +@app.put("/posts/{item_id}", response_model=PostResponse) +def update_post(item_id: int, item: PostCreate, db: Session = Depends(get_db)): + db_item = db.query(Post).filter(Post.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Post not found") + for key, value in item.model_dump().items(): + setattr(db_item, key, value) + db.commit() + db.refresh(db_item) + return db_item + + +@app.delete("/posts/{item_id}", status_code=204) +def delete_post(item_id: int, db: Session = Depends(get_db)): + db_item = db.query(Post).filter(Post.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Post not found") + db.delete(db_item) + db.commit() diff --git a/network-poc/kipina-codebench/golden-examples/blog/models.py b/network-poc/kipina-codebench/golden-examples/blog/models.py new file mode 100644 index 0000000..60d343c --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/blog/models.py @@ -0,0 +1,45 @@ +"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, ForeignKey-relaatiot.""" + +from datetime import datetime + +from sqlalchemy import String, Text, DateTime, ForeignKey, create_engine +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, sessionmaker + +DATABASE_URL = "sqlite:///./app.db" +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +class Base(DeclarativeBase): + pass + + +class Author(Base): + """Kirjoittaja — nimi, sähköposti ja bio.""" + + __tablename__ = "authors" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + name: Mapped[str] = mapped_column(String(255)) + email: Mapped[str] = mapped_column(String(255), unique=True) + bio: Mapped[str | None] = mapped_column(Text, default=None) + + posts: Mapped[list["Post"]] = relationship(back_populates="author") + + +class Post(Base): + """Blogipostaus — otsikko, sisältö, kirjoittaja, julkaisuaika ja tila.""" + + __tablename__ = "posts" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + title: Mapped[str] = mapped_column(String(255)) + content: Mapped[str] = mapped_column(Text) + author_id: Mapped[int] = mapped_column(ForeignKey("authors.id")) + published_at: Mapped[datetime | None] = mapped_column(DateTime, default=None) + status: Mapped[str] = mapped_column(String(20), default="draft") + + author: Mapped["Author"] = relationship(back_populates="posts") + + +Base.metadata.create_all(bind=engine) diff --git a/network-poc/kipina-codebench/golden-examples/blog/schemas.py b/network-poc/kipina-codebench/golden-examples/blog/schemas.py new file mode 100644 index 0000000..4e4cebe --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/blog/schemas.py @@ -0,0 +1,37 @@ +"""Pydantic v2 -skeemat — Create sisääntulolle, Response vastaukselle.""" + +from datetime import datetime + +from pydantic import BaseModel, ConfigDict + + +class AuthorCreate(BaseModel): + """Uuden kirjoittajan luonti. Pakolliset: name, email.""" + + name: str + email: str + bio: str | None = None + + +class AuthorResponse(AuthorCreate): + """Palautettava kirjoittaja — sisältää id:n.""" + + id: int + model_config = ConfigDict(from_attributes=True) + + +class PostCreate(BaseModel): + """Uuden postauksen luonti. Pakolliset: title, content, author_id.""" + + title: str + content: str + author_id: int + published_at: datetime | None = None + status: str = "draft" + + +class PostResponse(PostCreate): + """Palautettava postaus — sisältää id:n.""" + + id: int + model_config = ConfigDict(from_attributes=True) diff --git a/network-poc/kipina-codebench/golden-examples/blog/test_main.py b/network-poc/kipina-codebench/golden-examples/blog/test_main.py new file mode 100644 index 0000000..1510720 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/blog/test_main.py @@ -0,0 +1,164 @@ +"""Pytest — TestClient, erillinen test.db, uniikki data per testi.""" + +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from main import app, get_db +from models import Base + +test_engine = create_engine( + "sqlite:///./test.db", connect_args={"check_same_thread": False} +) +TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine) +Base.metadata.create_all(bind=test_engine) + + +def override_get_db(): + db = TestSession() + try: + yield db + finally: + db.close() + + +app.dependency_overrides[get_db] = override_get_db +client = TestClient(app) + + +def _create_author(name="Eino Leino", email=None): + """Apufunktio kirjoittajan luomiseen testeissä.""" + if email is None: + email = f"{name.lower().replace(' ', '.')}@example.com" + return client.post( + "/authors/", json={"name": name, "email": email} + ).json() + + +# --- Author-testit --- + + +def test_create_author(): + response = client.post( + "/authors/", + json={"name": "Aleksis Kivi", "email": "aleksis@example.com", "bio": "Suomen kansalliskirjailija"}, + ) + assert response.status_code == 201 + assert response.json()["name"] == "Aleksis Kivi" + assert response.json()["bio"] == "Suomen kansalliskirjailija" + assert "id" in response.json() + + +def test_list_authors(): + _create_author("Minna Canth", "minna.canth@example.com") + response = client.get("/authors/") + assert response.status_code == 200 + assert len(response.json()) >= 1 + + +def test_get_author_by_id(): + created = _create_author("Väinö Linna", "vaino.linna@example.com") + response = client.get(f"/authors/{created['id']}") + assert response.status_code == 200 + assert response.json()["id"] == created["id"] + + +def test_get_author_not_found(): + response = client.get("/authors/99999") + assert response.status_code == 404 + + +def test_update_author(): + created = _create_author("Vanha Nimi", "vanha.nimi@example.com") + response = client.put( + f"/authors/{created['id']}", + json={"name": "Uusi Nimi", "email": "uusi.nimi@example.com"}, + ) + assert response.status_code == 200 + assert response.json()["name"] == "Uusi Nimi" + + +def test_delete_author(): + created = _create_author("Poistettava Kirjailija", "poistettava@example.com") + response = client.delete(f"/authors/{created['id']}") + assert response.status_code == 204 + response = client.get(f"/authors/{created['id']}") + assert response.status_code == 404 + + +# --- Post-testit --- + + +def test_create_post(): + author = _create_author("Tove Jansson", "tove.jansson@example.com") + response = client.post( + "/posts/", + json={"title": "Muumipeikko ja pyrstötähti", "content": "Eräänä aamuna...", "author_id": author["id"]}, + ) + assert response.status_code == 201 + assert response.json()["title"] == "Muumipeikko ja pyrstötähti" + assert response.json()["author_id"] == author["id"] + assert response.json()["status"] == "draft" + + +def test_list_posts(): + author = _create_author("Juhani Aho", "juhani.aho@example.com") + client.post( + "/posts/", + json={"title": "Rautatie", "content": "Junasta kertova novelli.", "author_id": author["id"]}, + ) + response = client.get("/posts/") + assert response.status_code == 200 + assert len(response.json()) >= 1 + + +def test_get_post_by_id(): + author = _create_author("Elias Lönnrot", "elias.lonnrot@example.com") + created = client.post( + "/posts/", + json={"title": "Kalevala", "content": "Vaka vanha Väinämöinen.", "author_id": author["id"]}, + ).json() + response = client.get(f"/posts/{created['id']}") + assert response.status_code == 200 + assert response.json()["id"] == created["id"] + + +def test_get_post_not_found(): + response = client.get("/posts/99999") + assert response.status_code == 404 + + +def test_update_post(): + author = _create_author("Joel Lehtonen", "joel.lehtonen@example.com") + created = client.post( + "/posts/", + json={"title": "Vanha otsikko", "content": "Alkuperäinen teksti.", "author_id": author["id"]}, + ).json() + response = client.put( + f"/posts/{created['id']}", + json={"title": "Päivitetty otsikko", "content": "Muokattu teksti.", "author_id": author["id"], "status": "published"}, + ) + assert response.status_code == 200 + assert response.json()["title"] == "Päivitetty otsikko" + assert response.json()["status"] == "published" + + +def test_delete_post(): + author = _create_author("Aino Kallas", "aino.kallas@example.com") + created = client.post( + "/posts/", + json={"title": "Poistettava postaus", "content": "Tämä poistetaan.", "author_id": author["id"]}, + ).json() + response = client.delete(f"/posts/{created['id']}") + assert response.status_code == 204 + response = client.get(f"/posts/{created['id']}") + assert response.status_code == 404 + + +def test_post_belongs_to_author(): + author = _create_author("Sofi Oksanen", "sofi.oksanen@example.com") + post = client.post( + "/posts/", + json={"title": "Puhdistus", "content": "Romaani Virosta.", "author_id": author["id"]}, + ).json() + assert post["author_id"] == author["id"] diff --git a/network-poc/kipina-codebench/golden-examples/todo/main.py b/network-poc/kipina-codebench/golden-examples/todo/main.py new file mode 100644 index 0000000..17996e8 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/todo/main.py @@ -0,0 +1,61 @@ +"""FastAPI CRUD — yksi endpoint-setti per entiteetti.""" + +from fastapi import FastAPI, Depends, HTTPException +from sqlalchemy.orm import Session + +from models import SessionLocal, Todo +from schemas import TodoCreate, TodoResponse + +app = FastAPI() + + +def get_db(): + """Tietokantasessio per pyyntö.""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +@app.post("/todos/", response_model=TodoResponse, status_code=201) +def create_todo(item: TodoCreate, db: Session = Depends(get_db)): + db_item = Todo(**item.model_dump()) + db.add(db_item) + db.commit() + db.refresh(db_item) + return db_item + + +@app.get("/todos/", response_model=list[TodoResponse]) +def list_todos(db: Session = Depends(get_db)): + return db.query(Todo).all() + + +@app.get("/todos/{item_id}", response_model=TodoResponse) +def get_todo(item_id: int, db: Session = Depends(get_db)): + item = db.query(Todo).filter(Todo.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Todo not found") + return item + + +@app.put("/todos/{item_id}", response_model=TodoResponse) +def update_todo(item_id: int, item: TodoCreate, db: Session = Depends(get_db)): + db_item = db.query(Todo).filter(Todo.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Todo not found") + for key, value in item.model_dump().items(): + setattr(db_item, key, value) + db.commit() + db.refresh(db_item) + return db_item + + +@app.delete("/todos/{item_id}", status_code=204) +def delete_todo(item_id: int, db: Session = Depends(get_db)): + db_item = db.query(Todo).filter(Todo.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Todo not found") + db.delete(db_item) + db.commit() diff --git a/network-poc/kipina-codebench/golden-examples/todo/models.py b/network-poc/kipina-codebench/golden-examples/todo/models.py new file mode 100644 index 0000000..22af2ee --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/todo/models.py @@ -0,0 +1,30 @@ +"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite.""" + +from datetime import date + +from sqlalchemy import String, Text, Date, create_engine +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker + +DATABASE_URL = "sqlite:///./app.db" +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +class Base(DeclarativeBase): + pass + + +class Todo(Base): + """Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status.""" + + __tablename__ = "todos" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + title: Mapped[str] = mapped_column(String(255)) + description: Mapped[str | None] = mapped_column(Text, default=None) + due_date: Mapped[date | None] = mapped_column(Date, default=None) + priority: Mapped[int] = mapped_column(default=1) + status: Mapped[str] = mapped_column(String(20), default="pending") + + +Base.metadata.create_all(bind=engine) diff --git a/network-poc/kipina-codebench/golden-examples/todo/pyproject.toml b/network-poc/kipina-codebench/golden-examples/todo/pyproject.toml new file mode 100644 index 0000000..f91f12e --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/todo/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "todo-app" +version = "0.1.0" +requires-python = ">=3.14" +dependencies = [ + "fastapi", + "uvicorn[standard]", + "sqlalchemy", + "pytest", + "httpx", +] diff --git a/network-poc/kipina-codebench/golden-examples/todo/schemas.py b/network-poc/kipina-codebench/golden-examples/todo/schemas.py new file mode 100644 index 0000000..6f0d2b8 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/todo/schemas.py @@ -0,0 +1,22 @@ +"""Pydantic v2 -skeemat — Create sisääntulolle, Response vastaukselle.""" + +from datetime import date + +from pydantic import BaseModel, ConfigDict + + +class TodoCreate(BaseModel): + """Uuden tehtävän luonti. Pakolliset: title.""" + + title: str + description: str | None = None + due_date: date | None = None + priority: int = 1 + status: str = "pending" + + +class TodoResponse(TodoCreate): + """Palautettava tehtävä — sisältää id:n.""" + + id: int + model_config = ConfigDict(from_attributes=True) diff --git a/network-poc/kipina-codebench/golden-examples/todo/test_main.py b/network-poc/kipina-codebench/golden-examples/todo/test_main.py new file mode 100644 index 0000000..2bf60d9 --- /dev/null +++ b/network-poc/kipina-codebench/golden-examples/todo/test_main.py @@ -0,0 +1,69 @@ +"""Pytest — TestClient, erillinen test.db, uniikki data per testi.""" + +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from main import app, get_db +from models import Base + +test_engine = create_engine( + "sqlite:///./test.db", connect_args={"check_same_thread": False} +) +TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine) +Base.metadata.create_all(bind=test_engine) + + +def override_get_db(): + db = TestSession() + try: + yield db + finally: + db.close() + + +app.dependency_overrides[get_db] = override_get_db +client = TestClient(app) + + +def test_create_todo(): + response = client.post("/todos/", json={"title": "Osta maitoa", "priority": 2}) + assert response.status_code == 201 + assert response.json()["title"] == "Osta maitoa" + assert "id" in response.json() + + +def test_list_todos(): + client.post("/todos/", json={"title": "Listattava tehtävä"}) + response = client.get("/todos/") + assert response.status_code == 200 + assert len(response.json()) >= 1 + + +def test_get_todo_by_id(): + created = client.post("/todos/", json={"title": "Haettava tehtävä"}).json() + response = client.get(f"/todos/{created['id']}") + assert response.status_code == 200 + assert response.json()["id"] == created["id"] + + +def test_get_todo_not_found(): + response = client.get("/todos/99999") + assert response.status_code == 404 + + +def test_update_todo(): + created = client.post("/todos/", json={"title": "Vanha otsikko"}).json() + response = client.put( + f"/todos/{created['id']}", json={"title": "Uusi otsikko"} + ) + assert response.status_code == 200 + assert response.json()["title"] == "Uusi otsikko" + + +def test_delete_todo(): + created = client.post("/todos/", json={"title": "Poistettava"}).json() + response = client.delete(f"/todos/{created['id']}") + assert response.status_code == 204 + response = client.get(f"/todos/{created['id']}") + assert response.status_code == 404 diff --git a/network-poc/kipina-codebench/package.json b/network-poc/kipina-codebench/package.json new file mode 100644 index 0000000..ed02aba --- /dev/null +++ b/network-poc/kipina-codebench/package.json @@ -0,0 +1,13 @@ +{ + "name": "kipina-codebench", + "version": "0.1.0", + "description": "LLM-koodingenerointibenchmark — testaa Ollama-mallien kykyä generoida toimivia FastAPI-projekteja", + "type": "module", + "bin": { + "codebench": "./benchmark.mjs" + }, + "scripts": { + "bench": "node benchmark.mjs --scenarios all", + "docker:build": "docker build -t kipina-pytest -f Dockerfile.pytest ." + } +} diff --git a/network-poc/kipina-codebench/prompts/client.md b/network-poc/kipina-codebench/prompts/client.md new file mode 100644 index 0000000..45ea917 --- /dev/null +++ b/network-poc/kipina-codebench/prompts/client.md @@ -0,0 +1,15 @@ +You are a product owner who turns vague ideas into clear, actionable software requirements. + +GIVEN a short project description from the user, produce a structured brief: + +1. PROJECT NAME: a short, descriptive name +2. GOAL: one sentence explaining what the software does and who it's for +3. CORE FEATURES: numbered list of 3-8 concrete features (not vague wishes) +4. DATA MODEL: list the main entities and their key fields (include field types) +5. API ENDPOINTS: list the REST endpoints (method + path + purpose) +6. CONSTRAINTS: any technical constraints (e.g. "must use SQLite", "no auth needed") + +RULES: +- Be specific: "User can filter todos by status" not "todo management" +- Use plain English, no code +- Maximum 400 words total diff --git a/network-poc/kipina-codebench/prompts/code.md b/network-poc/kipina-codebench/prompts/code.md new file mode 100644 index 0000000..b7c2753 --- /dev/null +++ b/network-poc/kipina-codebench/prompts/code.md @@ -0,0 +1,36 @@ +You are a Python backend developer. Generate a FastAPI project with SQLAlchemy and SQLite. + +Given the project requirements, JSON specification, and a REFERENCE IMPLEMENTATION, generate these 4 files: + +1. models.py — SQLAlchemy 2.0: DeclarativeBase, Mapped, mapped_column (NOT legacy declarative_base) +2. schemas.py — Pydantic v2: ConfigDict(from_attributes=True) (NOT class Config) +3. main.py — FastAPI CRUD endpoints for each entity +4. test_main.py — Pytest with TestClient, separate test.db, unique test data per test + +Do NOT generate pyproject.toml — it is created separately with uv. + +OUTPUT FORMAT — use these exact markers to separate files: + +=== models.py === + + +=== schemas.py === + + +=== main.py === + + +=== test_main.py === + + +DOCUMENTATION — every file must have a one-line module docstring. Classes get a one-line docstring. Keep it zensical: say what it IS, not what it does. No filler. + +RULES: +- Follow the REFERENCE IMPLEMENTATION patterns exactly +- SQLAlchemy 2.0: DeclarativeBase + Mapped + mapped_column (not Column()) +- Python type unions: str | None (not Optional[str]) +- Tests: unique descriptive data per test, NOT generic "test_title" strings +- Tests: PUT/update test data MUST include ALL required (non-nullable) fields, not just the field being updated +- Absolute imports only (from models import ..., from schemas import ...) +- NO markdown fences inside file content — just raw code +- Only test endpoints that exist in main.py — no extra tests diff --git a/network-poc/kipina-codebench/prompts/fix.md b/network-poc/kipina-codebench/prompts/fix.md new file mode 100644 index 0000000..6b9a00d --- /dev/null +++ b/network-poc/kipina-codebench/prompts/fix.md @@ -0,0 +1 @@ +You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code. diff --git a/network-poc/kipina-codebench/prompts/spec.md b/network-poc/kipina-codebench/prompts/spec.md new file mode 100644 index 0000000..8308aa8 --- /dev/null +++ b/network-poc/kipina-codebench/prompts/spec.md @@ -0,0 +1,31 @@ +You are a software architect who designs database schemas for Python web applications. + +THINK STEP BY STEP before outputting JSON: +1. What are the main ENTITIES (nouns) in this project? +2. What FIELDS does each entity need? (name, type, required?) +3. Which entities REFERENCE each other? (e.g. "a Book belongs to an Author" → Book has author_id) +4. Are there Date/DateTime fields? → add extra_imports + +Then output ONLY valid JSON (no explanations before or after). + +SCHEMA: +{"project_name":"short-name","description":"One sentence","entities":[{"name":"EntityName","table_name":"entity_names","fields":[{"name":"field_name","sa_type":"String(255)","py_type":"str","nullable":false,"default":null}]}],"relationships":[{"from":"ChildEntity","field":"parent_id","to":"ParentEntity","type":"many-to-one"}],"extra_imports":[]} + +FIELD RULES: +- sa_type: String(N), Text, Integer, Date, DateTime, Boolean, Float +- py_type: str, int, float, bool, date, datetime — append " | None" if nullable +- Status fields: use String(20) with default value, NEVER Enum +- Every entity gets "id" automatically — do NOT add id or redundant ID fields +- Use snake_case for field names + +RELATIONSHIP RULES: +- If entity A "belongs to" entity B → A has b_id field (Integer, nullable=false) + relationship entry +- EVERY _id field MUST have a matching relationship entry +- Parent entities must appear BEFORE children in the entities array +- If no relationships, set "relationships": [] + +AVOID: redundant ID fields, generic names, more than 7 fields or 3 entities, non-English entity/field names (ALWAYS English even if description is Finnish) + +EXAMPLES (adapt, don't copy): +Todo app → Todo: title(str), description(Text|None), due_date(Date|None), status(String20="pending") +Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_id→Author, published_at(DateTime|None), status(String20="draft") diff --git a/network-poc/kipina-codebench/report-template.html b/network-poc/kipina-codebench/report-template.html new file mode 100644 index 0000000..e7c5ba1 --- /dev/null +++ b/network-poc/kipina-codebench/report-template.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/network-poc/kipina-codebench/results/2026-04-14_mistral.html b/network-poc/kipina-codebench/results/2026-04-14_mistral.html new file mode 100644 index 0000000..06898f7 --- /dev/null +++ b/network-poc/kipina-codebench/results/2026-04-14_mistral.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/network-poc/kipina-codebench/results/2026-04-14_mistral.json b/network-poc/kipina-codebench/results/2026-04-14_mistral.json new file mode 100644 index 0000000..7b8fd9b --- /dev/null +++ b/network-poc/kipina-codebench/results/2026-04-14_mistral.json @@ -0,0 +1,182 @@ +[ + { + "model": "codestral:22b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 63028, + "totalTokens": 2390, + "avgTokPerSec": 44.09843659433429, + "promptChars": 9567, + "promptTokensEst": 2392, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 4, + "testsPassed": 4, + "testsFailed": 0, + "totalDurationMs": 58359, + "totalTokens": 2313, + "avgTokPerSec": 44.04431775388366, + "promptChars": 9641, + "promptTokensEst": 2410, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 52020, + "totalTokens": 2073, + "avgTokPerSec": 44.03716103774298, + "promptChars": 10007, + "promptTokensEst": 2502, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "mistral-small3.1:24b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 76602, + "totalTokens": 2820, + "avgTokPerSec": 41.65340751865168, + "promptChars": 10816, + "promptTokensEst": 2704, + "score": 91, + "stars": "★★★★★", + "error": null + }, + { + "model": "mistral-small3.1:24b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 11004, + "promptTokensEst": 2751, + "score": 0, + "stars": "", + "error": "Puuttuvat: test_main.py" + }, + { + "model": "mistral-small3.1:24b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 10573, + "promptTokensEst": 2643, + "score": 0, + "stars": "", + "error": "Puuttuvat: test_main.py" + }, + { + "model": "devstral:24b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 54454, + "totalTokens": 1952, + "avgTokPerSec": 42.767057828688735, + "promptChars": 9829, + "promptTokensEst": 2457, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "devstral:24b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 1, + "testsFailed": 4, + "totalDurationMs": 50447, + "totalTokens": 1954, + "avgTokPerSec": 42.79877112859477, + "promptChars": 9678, + "promptTokensEst": 2420, + "score": 52, + "stars": "★★★☆☆", + "error": null + }, + { + "model": "devstral:24b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 83061, + "totalTokens": 3251, + "avgTokPerSec": 42.647732012717476, + "promptChars": 10561, + "promptTokensEst": 2640, + "score": 40, + "stars": "★★☆☆☆", + "error": null + } +] \ No newline at end of file diff --git a/network-poc/kipina-codebench/results/2026-04-14_top3.html b/network-poc/kipina-codebench/results/2026-04-14_top3.html new file mode 100644 index 0000000..94c39d8 --- /dev/null +++ b/network-poc/kipina-codebench/results/2026-04-14_top3.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/network-poc/kipina-codebench/results/2026-04-14_top3.json b/network-poc/kipina-codebench/results/2026-04-14_top3.json new file mode 100644 index 0000000..aa4db16 --- /dev/null +++ b/network-poc/kipina-codebench/results/2026-04-14_top3.json @@ -0,0 +1,182 @@ +[ + { + "model": "qwen3.5:35b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 63592, + "totalTokens": 4103, + "avgTokPerSec": 88.29857987765199, + "promptChars": 11310, + "promptTokensEst": 2828, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "qwen3.5:35b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 35262, + "totalTokens": 2733, + "avgTokPerSec": 88.26749243915684, + "promptChars": 10165, + "promptTokensEst": 2541, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "qwen3.5:35b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 60346, + "totalTokens": 4728, + "avgTokPerSec": 87.67792775342463, + "promptChars": 11661, + "promptTokensEst": 2915, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 4, + "testsFailed": 1, + "totalDurationMs": 80515, + "totalTokens": 3081, + "avgTokPerSec": 43.828884806830445, + "promptChars": 10150, + "promptTokensEst": 2538, + "score": 88, + "stars": "★★★★☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 3, + "testsFailed": 2, + "totalDurationMs": 61598, + "totalTokens": 2441, + "avgTokPerSec": 44.017116943523455, + "promptChars": 9288, + "promptTokensEst": 2322, + "score": 76, + "stars": "★★★★☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 81568, + "totalTokens": 3229, + "avgTokPerSec": 43.67638078062432, + "promptChars": 10475, + "promptTokensEst": 2619, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 30315, + "totalTokens": 2379, + "avgTokPerSec": 123.42041099401449, + "promptChars": 10111, + "promptTokensEst": 2528, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 23071, + "totalTokens": 2443, + "avgTokPerSec": 123.11212122029796, + "promptChars": 9150, + "promptTokensEst": 2288, + "score": 91, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 40933, + "totalTokens": 4370, + "avgTokPerSec": 121.8144240305409, + "promptChars": 10789, + "promptTokensEst": 2697, + "score": 100, + "stars": "★★★★★", + "error": null + } +] \ No newline at end of file