diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs index d35542f..a7631a5 100644 --- a/kipina-codebench/benchmark.mjs +++ b/kipina-codebench/benchmark.mjs @@ -379,31 +379,74 @@ async function runPipeline(model, scenario, round = 1) { const codeTokens = isConvert ? 8192 : (LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192); let files; - // File-by-file: generoi yksi tiedosto kerrallaan (pienille malleille) + // File-by-file: generoi yksi tiedosto kerrallaan + välitön validointi if (FILE_BY_FILE && LCONF.fileByFile) { const fbf = LCONF.fileByFile; + const MAX_FILE_FIX = 2; console.log(` [3/5] Koodigenerointi (file-by-file, ${fbf.length} tiedostoa)...`); files = {}; let context = ''; for (const fileDef of fbf) { const contextBlock = context ? `\nEXISTING CODE:\n${context}\n` : ''; - const filePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`; + const basePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`; - console.log(` [3/5] → ${fileDef.name}...`); - const fileResp = await ollamaChat(model, filePrompt, CODE_SYSTEM, 2048); - timings.push(fileResp); + let code = ''; + for (let attempt = 0; attempt <= MAX_FILE_FIX; attempt++) { + const prompt = attempt === 0 ? basePrompt + : `Fix the following Go compilation errors in "${fileDef.name}". Return ONLY the corrected file, no explanations.\n\nERRORS:\n${code.__buildErrors}\n\nCURRENT FILE:\n${code}\n\nOTHER FILES:\n${context}`; + + const label = attempt === 0 ? fileDef.name : `${fileDef.name} (fix ${attempt})`; + console.log(` [3/5] → ${label}...`); + const fileResp = await ollamaChat(model, prompt, CODE_SYSTEM, 2048); + timings.push(fileResp); + + code = fileResp.text + .replace(/^```(?:go|golang)?\s*\n?/m, '').replace(/\n?```\s*$/m, '') + .replace(/^(?:Here|Sure|Below|This|The|I )[\s\S]*?(?=package\s)/m, '') + .trim(); + + if (!code) break; - // Siivoa: poista markdown-aidat ja selitysteksti - let code = fileResp.text - .replace(/^```(?:go|golang)?\s*\n?/m, '').replace(/\n?```\s*$/m, '') - .replace(/^(?:Here|Sure|Below|This|The|I )[\s\S]*?(?=package\s)/m, '') - .trim(); - if (code) { - files[fileDef.name] = code + '\n'; - context += `=== ${fileDef.name} ===\n${code}\n\n`; const loc = code.split('\n').length; console.log(` [3/5] ${fileResp.tokens} tok, ${loc} lines, ${fileResp.tokPerSec.toFixed(0)} tok/s`); + + // Välitön validointi: kirjoita tiedostot levylle ja aja go build + if (LANG === 'go' && !fileDef.name.endsWith('_test.go')) { + files[fileDef.name] = code + '\n'; + // Kirjoita kaikki tähänastiset tiedostot + go.mod + const goldenMod = readFileSync(join(GOLDEN_DIR, 'todo-go', 'go.mod'), 'utf-8'); + const modName = spec.project_name?.replace(/[^a-z0-9-]/gi, '-') || 'generated-api'; + writeFileSync(join(dir, 'go.mod'), goldenMod.replace(/^module\s+\S+/m, `module ${modName}`)); + for (const [fn, c] of Object.entries(files)) { + writeFileSync(join(dir, fn), c); + } + // go build tarkistus + try { + execSync( + `docker run --rm --entrypoint sh -v "${dir}:/src:ro" ${LCONF.dockerImage} -c "cp -r /src/* . && go mod tidy 2>&1 && go build ./... 2>&1"`, + { timeout: 60000, encoding: 'utf-8' } + ); + console.log(` [3/5] ✓ kääntyy`); + break; // OK — seuraava tiedosto + } catch (e) { + const buildErrors = (e.stdout || e.stderr || '').split('\n').filter(l => /\.go:\d+/.test(l)).slice(0, 10).join('\n'); + if (!buildErrors || attempt >= MAX_FILE_FIX) { + console.log(` [3/5] ⚠ käännösvirhe (ei korjata)`); + break; + } + console.log(` [3/5] ✗ ${buildErrors.split('\n').length} virhettä → korjataan`); + code.__buildErrors = buildErrors; + result.fixRounds++; + } + } else { + break; // Testitiedostoa ei validoida go buildilla + } + } + + if (code && typeof code === 'string') { + files[fileDef.name] = code + '\n'; + context += `=== ${fileDef.name} ===\n${code}\n\n`; } } writeFileSync(`${dir}/_code_raw.txt`, context);