Siirrä kipina-codebench projektin päätasolle

2026-04-14 09:44:14 +03:00
parent b93ae2fd1b
commit 7b27800390
24 changed files with 0 additions and 0 deletions
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -0,0 +1,490 @@
+#!/usr/bin/env node
+/**
+ * Kipinä CodeBench — LLM-koodingenerointibenchmark
+ *
+ * Generoi FastAPI-projekteja Ollama-malleilla ja testaa pytest:llä Docker-kontissa.
+ *
+ * Käyttö:
+ *   node benchmark.mjs                                    # kaikki mallit, oletusskenaario
+ *   node benchmark.mjs --models qwen3-coder:30b           # yksi malli
+ *   node benchmark.mjs --ollama http://host:11434          # eri Ollama
+ *   node benchmark.mjs --scenarios all                    # kaikki skenaariot
+ *   node benchmark.mjs --output ./results/run-001         # custom output-hakemisto
+ */
+
+import { execSync } from 'child_process';
+import { writeFileSync, readFileSync, mkdirSync, rmSync, existsSync, readdirSync } from 'fs';
+import { dirname, join } from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// === CLI-argumentit ===
+const args = process.argv.slice(2);
+function arg(name, fallback) {
+    const i = args.indexOf(`--${name}`);
+    return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
+}
+const OLLAMA_URL = arg('ollama', process.env.OLLAMA_URL || 'http://localhost:11434');
+const HUB_URL = arg('hub', '');
+const FILTER_MODELS = arg('models', '');
+const SCENARIO_FILTER = arg('scenarios', 'default');
+const OUTPUT_DIR = arg('output', '/tmp/kipina-benchmark');
+const MAX_FIX_ROUNDS = 2;
+
+// === Promptien lataus tiedostoista ===
+function loadPrompt(name) {
+    const path = join(__dirname, 'prompts', `${name}.md`);
+    if (!existsSync(path)) throw new Error(`Prompti puuttuu: ${path}`);
+    return readFileSync(path, 'utf-8').trim();
+}
+const CLIENT_SYSTEM = loadPrompt('client');
+const SPEC_SYSTEM = loadPrompt('spec');
+const CODE_SYSTEM = loadPrompt('code');
+const FIX_SYSTEM = loadPrompt('fix');
+
+// === Kultaisten esimerkkien lataus ===
+const GOLDEN_DIR = join(__dirname, 'golden-examples');
+const GOLDEN_PY_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
+
+function loadGoldenExample() {
+    const todoDir = join(GOLDEN_DIR, 'todo');
+    if (!existsSync(todoDir)) return '';
+    let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n';
+    for (const f of GOLDEN_PY_FILES) {
+        const path = join(todoDir, f);
+        if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
+    }
+    return example;
+}
+const GOLDEN_EXAMPLE = loadGoldenExample();
+
+// === Ajattelutagien siivous (gemma4, qwen3/3.5 ym.) ===
+function stripThinking(text) {
+    return text
+        .replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '')  // gemma4
+        .replace(/<think>[\s\S]*?<\/think>/g, '')                // qwen3, qwen3.5
+        .trim();
+}
+
+// === Ollama / Hub -client ===
+async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) {
+    const start = Date.now();
+
+    if (HUB_URL) {
+        const taskId = `bench-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;
+        const resp = await fetch(`${HUB_URL}/api/v1/chat/completions`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ model, prompt, task_id: taskId, system_prompt: systemPrompt, max_tokens: maxTokens }),
+        });
+        if (!resp.ok) throw new Error(`Hub HTTP ${resp.status}: ${await resp.text()}`);
+        const data = await resp.json();
+        const elapsed = Date.now() - start;
+        return {
+            text: stripThinking((data.response || '').trim()),
+            tokens: data.tokens_generated || 0,
+            durationMs: elapsed,
+            tokPerSec: data.tokens_per_sec || (data.tokens_generated || 0) / (elapsed / 1000),
+        };
+    }
+
+    // Suora Ollama-reitti
+    const messages = [];
+    if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
+    messages.push({ role: 'user', content: prompt });
+
+    const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            model,
+            messages,
+            stream: false,
+            think: false,
+            options: { num_predict: maxTokens, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
+        }),
+    });
+    if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`);
+    const data = await resp.json();
+    const elapsed = Date.now() - start;
+    const rawContent = (data.message?.content || '').trim();
+    const thinking = (data.message?.thinking || '').trim();
+    const text = stripThinking(rawContent || thinking);
+    const evalCount = data.eval_count || 0;
+    if (!rawContent && thinking) console.log(`      ⚠ thinking-malli: ${thinking.length} merkkiä ajattelua, content tyhjä`);
+    const evalDurationNs = data.eval_duration || 1;
+    const tokPerSec = evalCount / (evalDurationNs / 1e9);
+    return { text, tokens: evalCount, durationMs: elapsed, tokPerSec };
+}
+
+async function ollamaListModels() {
+    const url = HUB_URL ? `${HUB_URL}/api/v1/ollama/tags` : `${OLLAMA_URL}/api/tags`;
+    const resp = await fetch(url);
+    if (!resp.ok) throw new Error(`Tags: HTTP ${resp.status}`);
+    const data = await resp.json();
+    return (data.models || []).map(m => m.name);
+}
+
+// === Tiedostoparseri LLM-vastauksesta ===
+function parseGeneratedFiles(text) {
+    const files = {};
+    const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/);
+    for (let i = 1; i < sections.length - 1; i += 2) {
+        const name = sections[i];
+        let content = sections[i + 1].trim();
+        content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
+        if (content) files[name] = content + '\n';
+    }
+    return files;
+}
+
+// === Validaattori ===
+function validateProjectCode(files) {
+    const issues = [];
+    for (const [fname, code] of Object.entries(files)) {
+        if (!fname.endsWith('.py')) continue;
+        const lines = code.split('\n');
+        for (const line of lines) {
+            if (/^from\s+\.(\w*)\s+import/.test(line)) issues.push(`ISSUE: ${fname}: relatiivinen import`);
+        }
+        for (const line of lines) {
+            const m = line.match(/^from\s+(models|schemas|main)\s+import\s+(.+)/);
+            if (!m) continue;
+            const srcCode = files[m[1] + '.py'];
+            if (!srcCode) { issues.push(`ISSUE: ${fname}: ${m[1]}.py puuttuu`); continue; }
+            const names = m[2].split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim());
+            for (const name of names) {
+                if (name && !srcCode.includes(name)) issues.push(`ISSUE: ${fname}: "${name}" puuttuu ${m[1]}.py:stä`);
+            }
+        }
+        if (fname === 'schemas.py') {
+            if (/:\s*date\b/.test(code) && !/from datetime import/.test(code))
+                issues.push('ISSUE: schemas.py: date-import puuttuu');
+            if (/:\s*datetime\b/.test(code) && !/from datetime import/.test(code))
+                issues.push('ISSUE: schemas.py: datetime-import puuttuu');
+        }
+        for (let i = 0; i < lines.length; i++) {
+            const line = lines[i];
+            if (/^\s*#/.test(line) || /^\s*$/.test(line)) continue;
+            if (/(?<!["\w])false(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "false" → "False"`);
+            if (/(?<!["\w])true(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "true" → "True"`);
+        }
+    }
+    return issues;
+}
+
+function extractJson(text) {
+    const m = text.match(/```(?:json)?\s*\n([\s\S]*?)```/);
+    if (m) text = m[1].trim();
+    let depth = 0, start = null;
+    for (let i = 0; i < text.length; i++) {
+        if (text[i] === '{') { if (depth === 0) start = i; depth++; }
+        else if (text[i] === '}') { depth--; if (depth === 0 && start !== null) { try { return JSON.parse(text.slice(start, i+1)); } catch(e) { continue; } } }
+    }
+    return null;
+}
+
+// === Testiskenaariot ===
+const SCENARIOS = [
+    { id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' },
+    { id: 'users', prompt: 'REST API käyttäjähallinnalle SQLite-tietokannalla' },
+    { id: 'blog', prompt: 'Blogi-API: kirjoittajat ja artikkelit, julkaisupäivämäärä ja status' },
+];
+
+// === Pisteytys (0–100) ja tähtiluokitus ===
+function scoreResult(r) {
+    if (r.error && r.testsTotal === 0) return 0;
+    let score = 0;
+    if (r.specOk) score += 10;
+    if (!r.error || r.testsTotal > 0) score += 10;
+    if (r.testsTotal > 0) score += Math.round((r.testsPassed / r.testsTotal) * 60);
+    score += Math.max(0, 20 - r.fixRounds * 10);
+    return Math.min(100, score);
+}
+function starsForScore(score) {
+    if (score >= 90) return '★★★★★';
+    if (score >= 70) return '★★★★☆';
+    if (score >= 50) return '★★★☆☆';
+    if (score >= 25) return '★★☆☆☆';
+    if (score > 0)   return '★☆☆☆☆';
+    return '☆☆☆☆☆';
+}
+
+// === Pipeline: yhdelle mallille ja skenaariolle ===
+async function runPipeline(model, scenario) {
+    const result = {
+        model, scenario: scenario.id,
+        reqOk: false, specOk: false, specEntities: 0,
+        validationIssues: 0, fixRounds: 0,
+        testsTotal: 0, testsPassed: 0, testsFailed: 0,
+        totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
+        promptChars: 0, promptTokensEst: 0,
+        score: 0, stars: '',
+        error: null,
+    };
+    const timings = [];
+    const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
+    mkdirSync(dir, { recursive: true });
+
+    try {
+        // 1. Vaatimukset
+        console.log(`    [1/5] Vaatimukset...`);
+        const req = await ollamaChat(model, scenario.prompt, CLIENT_SYSTEM, 2048);
+        timings.push(req);
+        if (!req.text || req.text.length < 50) { result.error = 'Vaatimukset liian lyhyet'; return result; }
+        result.reqOk = true;
+        writeFileSync(`${dir}/_requirements.txt`, req.text);
+
+        // 2. JSON-speksi
+        console.log(`    [2/5] JSON-speksi...`);
+        const specResp = await ollamaChat(model, `${req.text}\n\nOutput a JSON spec for this project.`, SPEC_SYSTEM, 4096);
+        timings.push(specResp);
+        const spec = extractJson(specResp.text);
+        if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
+        result.specOk = true;
+        result.specEntities = spec.entities.length;
+        writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
+
+        // 3. LLM-koodigenerointi
+        console.log(`    [3/5] Koodigenerointi (LLM)...`);
+        const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 4 files. Follow the reference implementation patterns exactly.`;
+        result.promptChars = CODE_SYSTEM.length + codePrompt.length;
+        result.promptTokensEst = Math.round(result.promptChars / 4);
+        const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
+        timings.push(codeResp);
+        writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
+        const files = parseGeneratedFiles(codeResp.text);
+        const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
+        const missing = required.filter(f => !files[f]);
+        if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }
+
+        // 4. Validointi + korjaussilmukka
+        let issues = validateProjectCode(files);
+        let fixRound = 0;
+        while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
+            fixRound++;
+            console.log(`    [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
+            const issuesByFile = {};
+            for (const issue of issues) {
+                const m = issue.match(/^ISSUE:\s*(\S+?):/);
+                const fname = m ? m[1] : 'unknown';
+                if (!issuesByFile[fname]) issuesByFile[fname] = [];
+                issuesByFile[fname].push(issue);
+            }
+            for (const [fname, fIssues] of Object.entries(issuesByFile)) {
+                if (!files[fname]) continue;
+                const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
+                const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
+                timings.push(fixResp);
+                if (fixResp.text) {
+                    files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
+                }
+            }
+            issues = validateProjectCode(files);
+        }
+        result.validationIssues = issues.length;
+        result.fixRounds = fixRound;
+
+        // Kirjoita LLM:n generoimat Python-tiedostot
+        for (const [fn, content] of Object.entries(files)) {
+            if (fn.endsWith('.py')) writeFileSync(`${dir}/${fn}`, content);
+        }
+
+        // 5. Pytest Docker-kontissa (kipina-pytest image)
+        console.log(`    [5/5] Pytest (Docker)...`);
+        try {
+            const pytestOut = execSync(
+                `docker run --rm -v "${dir}:/src:ro" kipina-pytest 2>&1`,
+                { timeout: 120000, encoding: 'utf-8' }
+            );
+            writeFileSync(`${dir}/_pytest.txt`, pytestOut);
+            const passedMatch = pytestOut.match(/(\d+) passed/);
+            const failedMatch = pytestOut.match(/(\d+) failed/);
+            result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
+            result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0;
+            result.testsTotal = result.testsPassed + result.testsFailed;
+        } catch (e) {
+            const output = e.stdout || e.stderr || e.message || '';
+            writeFileSync(`${dir}/_pytest.txt`, output);
+            const passedMatch = output.match(/(\d+) passed/);
+            const failedMatch = output.match(/(\d+) failed/);
+            const errorMatch = output.match(/(\d+) error/);
+            result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
+            result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0);
+            result.testsTotal = result.testsPassed + result.testsFailed;
+            if (result.testsTotal === 0) result.error = 'Pytest kaatui';
+        }
+    } catch (e) {
+        result.error = e.message;
+    }
+
+    // Yhteenveto
+    result.totalDurationMs = timings.reduce((s, t) => s + t.durationMs, 0);
+    result.totalTokens = timings.reduce((s, t) => s + t.tokens, 0);
+    result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
+    result.score = scoreResult(result);
+    result.stars = starsForScore(result.score);
+
+    return result;
+}
+
+// === Main ===
+async function main() {
+    console.log('╔══════════════════════════════════════════════╗');
+    console.log('║       Kipinä CodeBench                      ║');
+    console.log('╚══════════════════════════════════════════════╝');
+    console.log(`Ollama: ${OLLAMA_URL}`);
+
+    // Haetaan mallit
+    let models;
+    try {
+        models = await ollamaListModels();
+    } catch (e) {
+        console.error(`Ei yhteyttä Ollamaan (${OLLAMA_URL}): ${e.message}`);
+        process.exit(1);
+    }
+
+    if (FILTER_MODELS) {
+        const filter = FILTER_MODELS.split(',').map(s => s.trim());
+        models = models.filter(m => filter.some(f => m.includes(f)));
+    }
+
+    console.log(`Mallit (${models.length}): ${models.join(', ')}`);
+
+    const scenarios = SCENARIO_FILTER === 'all' ? SCENARIOS : [SCENARIOS[0]];
+    console.log(`Skenaariot (${scenarios.length}): ${scenarios.map(s => s.id).join(', ')}`);
+    console.log(`Tulokset: ${OUTPUT_DIR}/`);
+    console.log('');
+
+    // Puhdista output
+    rmSync(OUTPUT_DIR, { recursive: true, force: true });
+    mkdirSync(OUTPUT_DIR, { recursive: true });
+
+    const results = [];
+
+    for (const model of models) {
+        for (const scenario of scenarios) {
+            console.log(`\n━━━ ${model} × ${scenario.id} ━━━`);
+            const r = await runPipeline(model, scenario);
+            results.push(r);
+
+            const status = r.error ? `✗ ${r.error}` :
+                r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
+                `◐ ${r.testsPassed}/${r.testsTotal}`;
+            const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
+            console.log(`    → ${status} | ${r.stars} ${r.score}p | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
+        }
+    }
+
+    // === Tulostaulu ===
+    console.log('\n\n╔══════════════════════════════════════════════════════════════════════════════════════════════════╗');
+    console.log('║                                    TULOKSET                                                     ║');
+    console.log('╠══════════════════════════════════════════════════════════════════════════════════════════════════╣');
+
+    const header = [
+        'Malli'.padEnd(40),
+        'Skenaario'.padEnd(10),
+        'Speksi'.padEnd(8),
+        'Testit'.padEnd(10),
+        'Korjaus'.padEnd(8),
+        'Ctx'.padEnd(7),
+        'Aika'.padEnd(8),
+        'tok/s'.padEnd(8),
+        'Pisteet',
+    ].join(' │ ');
+    console.log(`║ ${header} ║`);
+    console.log('╠' + '═'.repeat(header.length + 2) + '╣');
+
+    for (const r of results) {
+        const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
+        const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
+        const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
+        const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
+        const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
+        const speed = `${r.avgTokPerSec.toFixed(0)}`;
+        const row = [
+            r.model.padEnd(40),
+            r.scenario.padEnd(10),
+            specStatus.padEnd(8),
+            testStatus.padEnd(10),
+            fixStatus.padEnd(8),
+            ctx.padEnd(7),
+            time.padEnd(8),
+            speed.padEnd(8),
+            `${r.stars} ${r.score}`,
+        ].join(' │ ');
+        console.log(`║ ${row} ║`);
+    }
+    console.log('╚' + '═'.repeat(header.length + 2) + '╝');
+
+    // === Mallikohtainen yhteenveto ===
+    const modelNames = [...new Set(results.map(r => r.model))];
+    const scenarioIds = scenarios.map(s => s.id);
+
+    console.log('\n');
+    const mHeader = [
+        'Malli'.padEnd(35),
+        ...scenarioIds.map(s => s.padEnd(22)),
+        'Yht.'.padEnd(8),
+        'Out'.padEnd(7),
+        'Aika'.padEnd(8),
+        'tok/s'.padEnd(7),
+        'Pisteet',
+    ].join(' │ ');
+    console.log(mHeader);
+    console.log('─'.repeat(mHeader.length));
+
+    for (const model of modelNames) {
+        const mrs = results.filter(r => r.model === model);
+        const cols = scenarioIds.map(sid => {
+            const r = mrs.find(r => r.scenario === sid);
+            if (!r) return '-'.padEnd(22);
+            const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
+            const s = `${(r.totalDurationMs/1000).toFixed(0)}s`;
+            const tok = r.totalTokens > 1000 ? `${(r.totalTokens/1000).toFixed(1)}K` : `${r.totalTokens}`;
+            return `${t} ${s} ${tok}`.padEnd(22);
+        });
+        const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0);
+        const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0);
+        const totalTokens = mrs.reduce((s, r) => s + r.totalTokens, 0);
+        const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0);
+        const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0;
+        const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0;
+        const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0;
+        const tokStr = totalTokens > 1000 ? `${(totalTokens/1000).toFixed(1)}K` : `${totalTokens}`;
+        const row = [
+            model.padEnd(35),
+            ...cols,
+            `${totalPassed}/${totalTests}`.padEnd(8),
+            tokStr.padEnd(7),
+            `${(totalTime/1000).toFixed(0)}s`.padEnd(8),
+            `${avgSpeed}`.padEnd(7),
+            `${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`,
+        ].join(' │ ');
+        console.log(row);
+    }
+
+    // Tallenna JSON + HTML-raportti
+    writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
+    const templatePath = join(__dirname, 'report-template.html');
+    if (existsSync(templatePath)) {
+        const html = readFileSync(templatePath, 'utf-8').replace(
+            '/*DATA_PLACEHOLDER*/[]',
+            JSON.stringify(results)
+        );
+        writeFileSync(`${OUTPUT_DIR}/report.html`, html);
+        console.log(`\nRaportti: ${OUTPUT_DIR}/report.html`);
+    }
+    console.log(`JSON: ${OUTPUT_DIR}/results.json`);
+
+    // Yhteenveto
+    const passed = results.filter(r => !r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0);
+    const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
+    const failed = results.filter(r => r.error || r.testsTotal === 0);
+    const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
+    const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0);
+    console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`);
+}
+
+main().catch(e => { console.error(e); process.exit(1); });