Files
agentic-studio/kipina-codebench/benchmark.mjs
jaakko 01b4fb8e22 CodeBench: --compact tiivistää golden examplen templaatiksi
Python: 1340 → 335 tokenia (−75%)
Rust: 3383 → 445 tokenia (−87%)
Käyttö: node benchmark.mjs --compact --models qwen3:4b
2026-04-14 10:59:39 +03:00

546 lines
25 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Kipinä CodeBench — LLM-koodingenerointibenchmark
*
* Generoi FastAPI-projekteja Ollama-malleilla ja testaa pytest:llä Docker-kontissa.
*
* Käyttö:
* node benchmark.mjs # kaikki mallit, oletusskenaario
* node benchmark.mjs --models qwen3-coder:30b # yksi malli
* node benchmark.mjs --ollama http://host:11434 # eri Ollama
* node benchmark.mjs --scenarios all # kaikki skenaariot
* node benchmark.mjs --output ./results/run-001 # custom output-hakemisto
*/
import { execSync } from 'child_process';
import { writeFileSync, readFileSync, mkdirSync, rmSync, existsSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
// === CLI-argumentit ===
const args = process.argv.slice(2);
function arg(name, fallback) {
const i = args.indexOf(`--${name}`);
return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
}
const OLLAMA_URL = arg('ollama', process.env.OLLAMA_URL || 'http://localhost:11434');
const HUB_URL = arg('hub', '');
const FILTER_MODELS = arg('models', '');
const SCENARIO_FILTER = arg('scenarios', 'default');
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
const RESULTS_DIR = join(__dirname, 'results');
const THINK_MODE = args.includes('--think');
const COMPACT_MODE = args.includes('--compact');
const LANG = arg('lang', 'python'); // python | rust
const MAX_FIX_ROUNDS = 2;
// === Promptien lataus tiedostoista ===
function loadPrompt(name) {
const path = join(__dirname, 'prompts', `${name}.md`);
if (!existsSync(path)) throw new Error(`Prompti puuttuu: ${path}`);
return readFileSync(path, 'utf-8').trim();
}
const CLIENT_SYSTEM = loadPrompt('client');
const SPEC_SYSTEM = loadPrompt('spec');
const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
const FIX_SYSTEM = loadPrompt('fix');
// === Kultaisten esimerkkien lataus (kielen mukaan) ===
const GOLDEN_DIR = join(__dirname, 'golden-examples');
const LANG_CONFIG = {
python: {
goldenDir: 'todo',
files: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
required: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
dockerImage: 'kipina-pytest',
},
rust: {
goldenDir: 'todo-rs',
files: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
dockerImage: 'kipina-cargo-test',
},
};
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
function loadGoldenExample() {
// --compact: käytä tiivistettyä templaattia täyden koodin sijaan
if (COMPACT_MODE) {
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
const compactPath = join(__dirname, 'prompts', compactFile);
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
}
// Täysi golden example
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
if (!existsSync(todoDir)) return '';
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
for (const f of LCONF.files) {
const path = join(todoDir, f);
if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
}
return example;
}
const GOLDEN_EXAMPLE = loadGoldenExample();
// === Ajattelutagien siivous (gemma4, qwen3/3.5 ym.) ===
function stripThinking(text) {
return text
.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '') // gemma4
.replace(/<think>[\s\S]*?<\/think>/g, '') // qwen3, qwen3.5
.trim();
}
// === Ollama / Hub -client ===
async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) {
const start = Date.now();
if (HUB_URL) {
const taskId = `bench-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;
const resp = await fetch(`${HUB_URL}/api/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt, task_id: taskId, system_prompt: systemPrompt, max_tokens: maxTokens }),
});
if (!resp.ok) throw new Error(`Hub HTTP ${resp.status}: ${await resp.text()}`);
const data = await resp.json();
const elapsed = Date.now() - start;
return {
text: stripThinking((data.response || '').trim()),
tokens: data.tokens_generated || 0,
durationMs: elapsed,
tokPerSec: data.tokens_per_sec || (data.tokens_generated || 0) / (elapsed / 1000),
};
}
// Suora Ollama-reitti
const messages = [];
if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
messages.push({ role: 'user', content: prompt });
const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
stream: false,
think: THINK_MODE,
options: { num_predict: THINK_MODE ? maxTokens * 3 : maxTokens, num_ctx: 16384, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
}),
});
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`);
const data = await resp.json();
const elapsed = Date.now() - start;
const rawContent = (data.message?.content || '').trim();
const thinking = (data.message?.thinking || '').trim();
const text = stripThinking(rawContent || thinking);
const evalCount = data.eval_count || 0;
if (!rawContent && thinking) console.log(` ⚠ thinking-malli: ${thinking.length} merkkiä ajattelua, content tyhjä`);
const evalDurationNs = data.eval_duration || 1;
const tokPerSec = evalCount / (evalDurationNs / 1e9);
return { text, tokens: evalCount, durationMs: elapsed, tokPerSec };
}
async function ollamaListModels() {
const url = HUB_URL ? `${HUB_URL}/api/v1/ollama/tags` : `${OLLAMA_URL}/api/tags`;
const resp = await fetch(url);
if (!resp.ok) throw new Error(`Tags: HTTP ${resp.status}`);
const data = await resp.json();
return (data.models || []).map(m => m.name);
}
// === Testitulosten parsinta (pytest + cargo test) ===
function parseTestOutput(output) {
// Pytest: "6 passed", "2 failed", "1 error"
const pyPassed = output.match(/(\d+) passed/);
const pyFailed = output.match(/(\d+) failed/);
const pyError = output.match(/(\d+) error/);
if (pyPassed || pyFailed) {
const passed = pyPassed ? parseInt(pyPassed[1]) : 0;
const failed = (pyFailed ? parseInt(pyFailed[1]) : 0) + (pyError ? parseInt(pyError[1]) : 0);
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
}
// Cargo test: "test result: ok. 10 passed; 0 failed;"
const cargoMatch = output.match(/test result: \w+\.\s*(\d+) passed;\s*(\d+) failed/);
if (cargoMatch) {
const passed = parseInt(cargoMatch[1]);
const failed = parseInt(cargoMatch[2]);
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
}
// Cargo compilation error: count "error[E" occurrences
const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
if (compileErrors > 0) {
return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
}
return { testsPassed: 0, testsFailed: 0, testsTotal: 0 };
}
// === Tiedostoparseri LLM-vastauksesta ===
function parseGeneratedFiles(text) {
const files = {};
const sections = text.split(/===\s*(\S+\.(?:py|toml|rs))\s*===/);
for (let i = 1; i < sections.length - 1; i += 2) {
const name = sections[i];
let content = sections[i + 1].trim();
content = content.replace(/^```(?:python|toml|rust)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
if (content) files[name] = content + '\n';
}
return files;
}
// === Validaattori ===
function validateProjectCode(files) {
const issues = [];
for (const [fname, code] of Object.entries(files)) {
if (!fname.endsWith('.py')) continue;
const lines = code.split('\n');
for (const line of lines) {
if (/^from\s+\.(\w*)\s+import/.test(line)) issues.push(`ISSUE: ${fname}: relatiivinen import`);
}
for (const line of lines) {
const m = line.match(/^from\s+(models|schemas|main)\s+import\s+(.+)/);
if (!m) continue;
const srcCode = files[m[1] + '.py'];
if (!srcCode) { issues.push(`ISSUE: ${fname}: ${m[1]}.py puuttuu`); continue; }
const names = m[2].split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim());
for (const name of names) {
if (name && !srcCode.includes(name)) issues.push(`ISSUE: ${fname}: "${name}" puuttuu ${m[1]}.py:stä`);
}
}
if (fname === 'schemas.py') {
if (/:\s*date\b/.test(code) && !/from datetime import/.test(code))
issues.push('ISSUE: schemas.py: date-import puuttuu');
if (/:\s*datetime\b/.test(code) && !/from datetime import/.test(code))
issues.push('ISSUE: schemas.py: datetime-import puuttuu');
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (/^\s*#/.test(line) || /^\s*$/.test(line)) continue;
if (/(?<!["\w])false(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "false" → "False"`);
if (/(?<!["\w])true(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "true" → "True"`);
}
}
return issues;
}
function extractJson(text) {
const m = text.match(/```(?:json)?\s*\n([\s\S]*?)```/);
if (m) text = m[1].trim();
let depth = 0, start = null;
for (let i = 0; i < text.length; i++) {
if (text[i] === '{') { if (depth === 0) start = i; depth++; }
else if (text[i] === '}') { depth--; if (depth === 0 && start !== null) { try { return JSON.parse(text.slice(start, i+1)); } catch(e) { continue; } } }
}
return null;
}
// === Testiskenaariot ===
const SCENARIOS = [
{ id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' },
{ id: 'users', prompt: 'REST API käyttäjähallinnalle SQLite-tietokannalla' },
{ id: 'blog', prompt: 'Blogi-API: kirjoittajat ja artikkelit, julkaisupäivämäärä ja status' },
];
// === Pisteytys (0100) ja tähtiluokitus ===
function scoreResult(r) {
if (r.error && r.testsTotal === 0) return 0;
let score = 0;
if (r.specOk) score += 10;
if (!r.error || r.testsTotal > 0) score += 10;
if (r.testsTotal > 0) score += Math.round((r.testsPassed / r.testsTotal) * 60);
score += Math.max(0, 20 - r.fixRounds * 10);
return Math.min(100, score);
}
function starsForScore(score) {
if (score >= 90) return '★★★★★';
if (score >= 70) return '★★★★☆';
if (score >= 50) return '★★★☆☆';
if (score >= 25) return '★★☆☆☆';
if (score > 0) return '★☆☆☆☆';
return '☆☆☆☆☆';
}
// === Pipeline: yhdelle mallille ja skenaariolle ===
async function runPipeline(model, scenario) {
const result = {
model, scenario: scenario.id,
reqOk: false, specOk: false, specEntities: 0,
validationIssues: 0, fixRounds: 0,
testsTotal: 0, testsPassed: 0, testsFailed: 0,
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
promptChars: 0, promptTokensEst: 0,
score: 0, stars: '',
error: null,
};
const timings = [];
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
mkdirSync(dir, { recursive: true });
try {
// 1. Vaatimukset
console.log(` [1/5] Vaatimukset...`);
const req = await ollamaChat(model, scenario.prompt, CLIENT_SYSTEM, 2048);
timings.push(req);
if (!req.text || req.text.length < 50) { result.error = 'Vaatimukset liian lyhyet'; return result; }
result.reqOk = true;
writeFileSync(`${dir}/_requirements.txt`, req.text);
// 2. JSON-speksi
console.log(` [2/5] JSON-speksi...`);
const specResp = await ollamaChat(model, `${req.text}\n\nOutput a JSON spec for this project.`, SPEC_SYSTEM, 4096);
timings.push(specResp);
const spec = extractJson(specResp.text);
if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
result.specOk = true;
result.specEntities = spec.entities.length;
writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
// 3. LLM-koodigenerointi
console.log(` [3/5] Koodigenerointi (LLM)...`);
const fileCount = LCONF.required.length;
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
result.promptTokensEst = Math.round(result.promptChars / 4);
const codeTokens = LANG === 'rust' ? 12288 : 8192;
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, codeTokens);
timings.push(codeResp);
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
const files = parseGeneratedFiles(codeResp.text);
const missing = LCONF.required.filter(f => !files[f]);
if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }
// 4. Validointi + korjaussilmukka (Python-spesifi)
let fixRound = 0;
if (LANG === 'python') {
let issues = validateProjectCode(files);
while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
fixRound++;
console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
const issuesByFile = {};
for (const issue of issues) {
const m = issue.match(/^ISSUE:\s*(\S+?):/);
const fname = m ? m[1] : 'unknown';
if (!issuesByFile[fname]) issuesByFile[fname] = [];
issuesByFile[fname].push(issue);
}
for (const [fname, fIssues] of Object.entries(issuesByFile)) {
if (!files[fname]) continue;
const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
timings.push(fixResp);
if (fixResp.text) {
files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
}
}
issues = validateProjectCode(files);
}
result.validationIssues = issues.length;
}
result.fixRounds = fixRound;
// Kirjoita LLM:n generoimat tiedostot (luo src/ ja tests/ alihakemistot tarvittaessa)
for (const [fn, content] of Object.entries(files)) {
const filePath = join(dir, fn);
mkdirSync(dirname(filePath), { recursive: true });
writeFileSync(filePath, content);
}
// 5. Testit Docker-kontissa
const testLabel = LANG === 'rust' ? 'Cargo test (Docker)' : 'Pytest (Docker)';
console.log(` [5/5] ${testLabel}...`);
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
try {
const testOut = execSync(
`docker run --rm -v "${dir}:/src:ro" ${LCONF.dockerImage} 2>&1`,
{ timeout: dockerTimeout, encoding: 'utf-8' }
);
writeFileSync(`${dir}/_testout.txt`, testOut);
Object.assign(result, parseTestOutput(testOut));
} catch (e) {
const output = e.stdout || e.stderr || e.message || '';
writeFileSync(`${dir}/_testout.txt`, output);
Object.assign(result, parseTestOutput(output));
if (result.testsTotal === 0) result.error = 'Testit kaatuivat';
}
} catch (e) {
result.error = e.message;
}
// Yhteenveto
result.totalDurationMs = timings.reduce((s, t) => s + t.durationMs, 0);
result.totalTokens = timings.reduce((s, t) => s + t.tokens, 0);
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
result.score = scoreResult(result);
result.stars = starsForScore(result.score);
return result;
}
// === Main ===
async function main() {
console.log('╔══════════════════════════════════════════════╗');
console.log('║ Kipinä CodeBench ║');
console.log('╚══════════════════════════════════════════════╝');
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${COMPACT_MODE ? ' (compact)' : ''}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
// Haetaan mallit
let models;
try {
models = await ollamaListModels();
} catch (e) {
console.error(`Ei yhteyttä Ollamaan (${OLLAMA_URL}): ${e.message}`);
process.exit(1);
}
if (FILTER_MODELS) {
const filter = FILTER_MODELS.split(',').map(s => s.trim());
models = models.filter(m => filter.some(f => m.includes(f)));
}
console.log(`Mallit (${models.length}): ${models.join(', ')}`);
const scenarios = SCENARIO_FILTER === 'all' ? SCENARIOS : [SCENARIOS[0]];
console.log(`Skenaariot (${scenarios.length}): ${scenarios.map(s => s.id).join(', ')}`);
console.log(`Tulokset: ${OUTPUT_DIR}/`);
console.log('');
// Puhdista output
rmSync(OUTPUT_DIR, { recursive: true, force: true });
mkdirSync(OUTPUT_DIR, { recursive: true });
const results = [];
for (const model of models) {
for (const scenario of scenarios) {
console.log(`\n━━━ ${model} × ${scenario.id} ━━━`);
const r = await runPipeline(model, scenario);
results.push(r);
const status = r.error ? `${r.error}` :
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` :
`${r.testsPassed}/${r.testsTotal}`;
const ctxInfo = r.promptTokensEst > 0 ? ` | ctx ~${(r.promptTokensEst/1000).toFixed(1)}K` : '';
console.log(`${status} | ${r.stars} ${r.score}p | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s${ctxInfo}`);
}
}
// === Tulostaulu ===
console.log('\n\n╔══════════════════════════════════════════════════════════════════════════════════════════════════╗');
console.log('║ TULOKSET ║');
console.log('╠══════════════════════════════════════════════════════════════════════════════════════════════════╣');
const header = [
'Malli'.padEnd(40),
'Skenaario'.padEnd(10),
'Speksi'.padEnd(8),
'Testit'.padEnd(10),
'Korjaus'.padEnd(8),
'Ctx'.padEnd(7),
'Aika'.padEnd(8),
'tok/s'.padEnd(8),
'Pisteet',
].join(' │ ');
console.log(`${header}`);
console.log('╠' + '═'.repeat(header.length + 2) + '╣');
for (const r of results) {
const specStatus = r.specOk ? `${r.specEntities}e` : '✗';
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
const ctx = r.promptTokensEst > 0 ? `~${(r.promptTokensEst/1000).toFixed(1)}K` : '-';
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
const speed = `${r.avgTokPerSec.toFixed(0)}`;
const row = [
r.model.padEnd(40),
r.scenario.padEnd(10),
specStatus.padEnd(8),
testStatus.padEnd(10),
fixStatus.padEnd(8),
ctx.padEnd(7),
time.padEnd(8),
speed.padEnd(8),
`${r.stars} ${r.score}`,
].join(' │ ');
console.log(`${row}`);
}
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
// === Mallikohtainen yhteenveto ===
const modelNames = [...new Set(results.map(r => r.model))];
const scenarioIds = scenarios.map(s => s.id);
console.log('\n');
const mHeader = [
'Malli'.padEnd(35),
...scenarioIds.map(s => s.padEnd(22)),
'Yht.'.padEnd(8),
'Out'.padEnd(7),
'Aika'.padEnd(8),
'tok/s'.padEnd(7),
'Pisteet',
].join(' │ ');
console.log(mHeader);
console.log('─'.repeat(mHeader.length));
for (const model of modelNames) {
const mrs = results.filter(r => r.model === model);
const cols = scenarioIds.map(sid => {
const r = mrs.find(r => r.scenario === sid);
if (!r) return '-'.padEnd(22);
const t = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
const s = `${(r.totalDurationMs/1000).toFixed(0)}s`;
const tok = r.totalTokens > 1000 ? `${(r.totalTokens/1000).toFixed(1)}K` : `${r.totalTokens}`;
return `${t} ${s} ${tok}`.padEnd(22);
});
const totalPassed = mrs.reduce((s, r) => s + r.testsPassed, 0);
const totalTests = mrs.reduce((s, r) => s + r.testsTotal, 0);
const totalTokens = mrs.reduce((s, r) => s + r.totalTokens, 0);
const totalTime = mrs.reduce((s, r) => s + r.totalDurationMs, 0);
const avgSpeed = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.avgTokPerSec, 0) / mrs.length) : 0;
const avgScoreModel = mrs.length > 0 ? Math.round(mrs.reduce((s, r) => s + r.score, 0) / mrs.length) : 0;
const pct = totalTests > 0 ? Math.round(totalPassed / totalTests * 100) : 0;
const tokStr = totalTokens > 1000 ? `${(totalTokens/1000).toFixed(1)}K` : `${totalTokens}`;
const row = [
model.padEnd(35),
...cols,
`${totalPassed}/${totalTests}`.padEnd(8),
tokStr.padEnd(7),
`${(totalTime/1000).toFixed(0)}s`.padEnd(8),
`${avgSpeed}`.padEnd(7),
`${starsForScore(avgScoreModel)} ${avgScoreModel}p (${pct}%)`,
].join(' │ ');
console.log(row);
}
// Tallenna JSON + HTML-raportti
const jsonData = JSON.stringify(results, null, 2);
writeFileSync(`${OUTPUT_DIR}/results.json`, jsonData);
const templatePath = join(__dirname, 'report-template.html');
let htmlData = '';
if (existsSync(templatePath)) {
htmlData = readFileSync(templatePath, 'utf-8').replace('/*DATA_PLACEHOLDER*/[]', JSON.stringify(results));
writeFileSync(`${OUTPUT_DIR}/report.html`, htmlData);
console.log(`\nRaportti: ${OUTPUT_DIR}/report.html`);
}
console.log(`JSON: ${OUTPUT_DIR}/results.json`);
// Kopioi results/-kansioon aikaleimalla
mkdirSync(RESULTS_DIR, { recursive: true });
writeFileSync(join(RESULTS_DIR, `${TIMESTAMP}.json`), jsonData);
if (htmlData) writeFileSync(join(RESULTS_DIR, `${TIMESTAMP}.html`), htmlData);
console.log(`Arkistoitu: results/${TIMESTAMP}.json`);
// Yhteenveto
const passed = results.filter(r => !r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0);
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
const failed = results.filter(r => r.error || r.testsTotal === 0);
const avgScore = results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length) : 0;
const totalTime = results.reduce((s, r) => s + r.totalDurationMs, 0);
console.log(`\n${starsForScore(avgScore)} Keskiarvo: ${avgScore}p | ✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length} | Kokonaisaika: ${(totalTime/1000/60).toFixed(1)} min`);
}
main().catch(e => { console.error(e); process.exit(1); });