CodeBench: Go-tuki — Chi + SQLite + httptest

- Golden example: todo-go/ (6/6 testit läpi)
- todo-go.md golden reference
- prompts/code-go.md koodigenerointi-prompti
- Dockerfile.go-test (golang:1.23-alpine)
- benchmark.mjs: LANG_CONFIG, parseTestOutput, prompt/golden-valinta Go:lle
- Käyttö: node benchmark.mjs --lang go --models qwen2.5-coder:32b
This commit is contained in:
2026-04-14 19:20:18 +03:00
parent 5ea2540588
commit f3cd1347ab
10 changed files with 1043 additions and 15 deletions

View File

@@ -34,7 +34,7 @@ const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
const RESULTS_DIR = join(__dirname, 'results');
const THINK_MODE = args.includes('--think');
const COMPACT_MODE = args.includes('--compact');
const LANG = arg('lang', 'python'); // python | rust
const LANG = arg('lang', 'python'); // python | rust | go
const ROUNDS = parseInt(arg('rounds', '1')); // 1-10 toistoa
const MAX_FIX_ROUNDS = 2;
@@ -53,11 +53,12 @@ const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-
function getGoldenForModel(model) {
const modelConf = PROFILES.models[model];
const goldenFile = modelConf?.golden || 'todo.md';
// Rust-kielelle vaihda .md → -rs.md (todo.md → todo-rs.md)
if (LANG === 'rust') {
const rsFile = goldenFile.replace(/\.md$/, '-rs.md');
const rsPath = join(GOLDEN_DIR, rsFile);
if (existsSync(rsPath)) return rsFile;
// Kielispesifi golden: todo.md → todo-rs.md / todo-go.md
const langSuffix = { rust: '-rs', go: '-go' }[LANG];
if (langSuffix) {
const langFile = goldenFile.replace(/\.md$/, `${langSuffix}.md`);
const langPath = join(GOLDEN_DIR, langFile);
if (existsSync(langPath)) return langFile;
}
return goldenFile;
}
@@ -66,8 +67,8 @@ function getCodePromptForModel(model) {
const modelConf = PROFILES.models[model];
const profile = modelConf?.profile || PROFILES.default_profile;
const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
const suffix = LANG === 'rust' ? '-rs' : '';
// Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
const suffix = { rust: '-rs', go: '-go' }[LANG] || '';
// Yritä kielispesifistä ensin (code-small-rs, code-go), sitten perus (code-small)
const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
for (const name of candidates) {
const path = join(__dirname, 'prompts', `${name}.md`);
@@ -91,18 +92,24 @@ const LANG_CONFIG = {
required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
dockerImage: 'kipina-cargo-test',
},
go: {
goldenDir: 'todo-go',
files: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
required: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
dockerImage: 'kipina-go-test',
},
};
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
function loadGoldenExample(model) {
// --compact: käytä tiivistettyä templaattia
if (COMPACT_MODE) {
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
const compactFile = { rust: 'golden-compact-rs.md', go: 'golden-compact-go.md' }[LANG] || 'golden-compact-py.md';
const compactPath = join(__dirname, 'prompts', compactFile);
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
}
// Mallikohtainen golden example profiilista
const goldenFile = model ? getGoldenForModel(model) : (LANG === 'rust' ? 'todo-rs.md' : 'todo.md');
const goldenFile = model ? getGoldenForModel(model) : ({ rust: 'todo-rs.md', go: 'todo-go.md' }[LANG] || 'todo.md');
const mdPath = join(GOLDEN_DIR, goldenFile);
if (existsSync(mdPath)) return '\n' + readFileSync(mdPath, 'utf-8').trim() + '\n';
// Fallback: erilliset tiedostot
@@ -201,7 +208,13 @@ function parseTestOutput(output) {
const failed = parseInt(cargoMatch[2]);
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
}
// Cargo compilation error: count "error[E" occurrences
// Go test: "--- PASS:" / "--- FAIL:"
const goPassed = (output.match(/--- PASS:/g) || []).length;
const goFailed = (output.match(/--- FAIL:/g) || []).length;
if (goPassed + goFailed > 0) {
return { testsPassed: goPassed, testsFailed: goFailed, testsTotal: goPassed + goFailed };
}
// Cargo/Go compilation error: count "error[E" or Go compile errors
const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
if (compileErrors > 0) {
return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
@@ -333,7 +346,7 @@ async function runPipeline(model, scenario) {
// 3. LLM-koodigenerointi
const fileCount = LCONF.required.length;
const goldenExample = loadGoldenExample(model);
const codeTokens = LANG === 'rust' ? 12288 : 8192;
const codeTokens = LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192;
let files;
// Orkestrointi: pilko entiteetti kerrallaan pienille malleille
@@ -454,7 +467,7 @@ async function runPipeline(model, scenario) {
result.fixRounds = fixRound;
// 5. Testit Docker-kontissa + itsekorjaava looppi (Taso 4)
const testLabel = LANG === 'rust' ? 'Cargo test' : 'Pytest';
const testLabel = { rust: 'Cargo test', go: 'Go test', python: 'Pytest' }[LANG] || 'Test';
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
const MAX_TEST_FIX = 3;
let bestFiles = { ...files }; // Paras versio tiedostoista
@@ -538,13 +551,13 @@ async function runPipeline(model, scenario) {
}
// Itsekorjaus: syötä virhe + koodi mallille
const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E/.test(l)).slice(0, 20).join('\n');
const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E|--- FAIL|panic:|\.go:\d+/.test(l)).slice(0, 20).join('\n');
if (!errorLines) break; // Ei parsittavia virheitä
console.log(` [5/5] Itsekorjaus: ${result.testsFailed || 'virhe'}...`);
const allCode = Object.entries(files).map(([fn, c]) => `=== ${fn} ===\n${c}`).join('\n\n');
const fixPrompt = `The following test errors occurred. Fix the code so ALL tests pass. Return ALL files with === markers.\n\nERRORS:\n${errorLines}\n\nCURRENT CODE:\n${allCode}`;
const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : 8192);
const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192);
timings.push(fixResp);
const fixedFiles = parseGeneratedFiles(fixResp.text);