CodeBench Taso 4: itsekorjaava looppi — syötä pytest-virhe mallille
Jos testit epäonnistuvat, LLM saa virheilmoituksen + koodin ja korjaa. Max 3 korjauskierrosta. Testattu: qwen3:8b users 0/6 → korjaus → 6/6.
This commit is contained in:
@@ -346,30 +346,61 @@ async function runPipeline(model, scenario) {
|
|||||||
}
|
}
|
||||||
result.fixRounds = fixRound;
|
result.fixRounds = fixRound;
|
||||||
|
|
||||||
// Kirjoita LLM:n generoimat tiedostot (luo src/ ja tests/ alihakemistot tarvittaessa)
|
// 5. Testit Docker-kontissa + itsekorjaava looppi (Taso 4)
|
||||||
for (const [fn, content] of Object.entries(files)) {
|
const testLabel = LANG === 'rust' ? 'Cargo test' : 'Pytest';
|
||||||
const filePath = join(dir, fn);
|
|
||||||
mkdirSync(dirname(filePath), { recursive: true });
|
|
||||||
writeFileSync(filePath, content);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Testit Docker-kontissa
|
|
||||||
const testLabel = LANG === 'rust' ? 'Cargo test (Docker)' : 'Pytest (Docker)';
|
|
||||||
console.log(` [5/5] ${testLabel}...`);
|
|
||||||
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
|
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
|
||||||
try {
|
const MAX_TEST_FIX = 3;
|
||||||
const testOut = execSync(
|
|
||||||
`docker run --rm -v "${dir}:/src:ro" ${LCONF.dockerImage} 2>&1`,
|
for (let testRound = 0; testRound <= MAX_TEST_FIX; testRound++) {
|
||||||
{ timeout: dockerTimeout, encoding: 'utf-8' }
|
// Kirjoita tiedostot levylle
|
||||||
);
|
for (const [fn, content] of Object.entries(files)) {
|
||||||
writeFileSync(`${dir}/_testout.txt`, testOut);
|
const filePath = join(dir, fn);
|
||||||
|
mkdirSync(dirname(filePath), { recursive: true });
|
||||||
|
writeFileSync(filePath, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
const roundLabel = testRound > 0 ? ` (korjaus ${testRound}/${MAX_TEST_FIX})` : '';
|
||||||
|
console.log(` [5/5] ${testLabel}${roundLabel}...`);
|
||||||
|
|
||||||
|
let testOut = '';
|
||||||
|
try {
|
||||||
|
testOut = execSync(
|
||||||
|
`docker run --rm -v "${dir}:/src:ro" ${LCONF.dockerImage} 2>&1`,
|
||||||
|
{ timeout: dockerTimeout, encoding: 'utf-8' }
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
testOut = e.stdout || e.stderr || e.message || '';
|
||||||
|
}
|
||||||
|
writeFileSync(`${dir}/_testout_${testRound}.txt`, testOut);
|
||||||
Object.assign(result, parseTestOutput(testOut));
|
Object.assign(result, parseTestOutput(testOut));
|
||||||
} catch (e) {
|
|
||||||
const output = e.stdout || e.stderr || e.message || '';
|
// Kaikki testit läpi → valmis
|
||||||
writeFileSync(`${dir}/_testout.txt`, output);
|
if (result.testsTotal > 0 && result.testsPassed === result.testsTotal) break;
|
||||||
Object.assign(result, parseTestOutput(output));
|
|
||||||
if (result.testsTotal === 0) result.error = 'Testit kaatuivat';
|
// Viimeinen kierros tai ei enää korjausmahdollisuutta
|
||||||
|
if (testRound >= MAX_TEST_FIX) {
|
||||||
|
if (result.testsTotal === 0) result.error = 'Testit kaatuivat';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Itsekorjaus: syötä virhe + koodi mallille
|
||||||
|
const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E/.test(l)).slice(0, 20).join('\n');
|
||||||
|
if (!errorLines) break; // Ei parsittavia virheitä
|
||||||
|
|
||||||
|
console.log(` [5/5] Itsekorjaus: ${result.testsFailed || 'virhe'}...`);
|
||||||
|
const allCode = Object.entries(files).map(([fn, c]) => `=== ${fn} ===\n${c}`).join('\n\n');
|
||||||
|
const fixPrompt = `The following test errors occurred. Fix the code so ALL tests pass. Return ALL files with === markers.\n\nERRORS:\n${errorLines}\n\nCURRENT CODE:\n${allCode}`;
|
||||||
|
const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : 8192);
|
||||||
|
timings.push(fixResp);
|
||||||
|
|
||||||
|
const fixedFiles = parseGeneratedFiles(fixResp.text);
|
||||||
|
// Päivitä vain tiedostot jotka malli palautti
|
||||||
|
for (const [fn, content] of Object.entries(fixedFiles)) {
|
||||||
|
if (LCONF.required.includes(fn)) files[fn] = content;
|
||||||
|
}
|
||||||
|
result.fixRounds++;
|
||||||
}
|
}
|
||||||
|
writeFileSync(`${dir}/_testout.txt`, ''); // Symlink viimeisimpään
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
result.error = e.message;
|
result.error = e.message;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user