CodeBench: Go-tuki — Chi + SQLite + httptest

- Golden example: todo-go/ (6/6 testit läpi) - todo-go.md golden reference - prompts/code-go.md koodigenerointi-prompti - Dockerfile.go-test (golang:1.23-alpine) - benchmark.mjs: LANG_CONFIG, parseTestOutput, prompt/golden-valinta Go:lle - Käyttö: node benchmark.mjs --lang go --models qwen2.5-coder:32b
2026-04-14 19:20:18 +03:00
parent 5ea2540588
commit f3cd1347ab
10 changed files with 1043 additions and 15 deletions
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -34,7 +34,7 @@ const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
 const RESULTS_DIR = join(__dirname, 'results');
 const THINK_MODE = args.includes('--think');
 const COMPACT_MODE = args.includes('--compact');
-const LANG = arg('lang', 'python');  // python | rust
+const LANG = arg('lang', 'python');  // python | rust | go
 const ROUNDS = parseInt(arg('rounds', '1'));  // 1-10 toistoa
 const MAX_FIX_ROUNDS = 2;

@@ -53,11 +53,12 @@ const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-
 function getGoldenForModel(model) {
    const modelConf = PROFILES.models[model];
    const goldenFile = modelConf?.golden || 'todo.md';
-    // Rust-kielelle vaihda .md → -rs.md (todo.md → todo-rs.md)
-    if (LANG === 'rust') {
-        const rsFile = goldenFile.replace(/\.md$/, '-rs.md');
-        const rsPath = join(GOLDEN_DIR, rsFile);
-        if (existsSync(rsPath)) return rsFile;
+    // Kielispesifi golden: todo.md → todo-rs.md / todo-go.md
+    const langSuffix = { rust: '-rs', go: '-go' }[LANG];
+    if (langSuffix) {
+        const langFile = goldenFile.replace(/\.md$/, `${langSuffix}.md`);
+        const langPath = join(GOLDEN_DIR, langFile);
+        if (existsSync(langPath)) return langFile;
    }
    return goldenFile;
 }
@@ -66,8 +67,8 @@ function getCodePromptForModel(model) {
    const modelConf = PROFILES.models[model];
    const profile = modelConf?.profile || PROFILES.default_profile;
    const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
-    const suffix = LANG === 'rust' ? '-rs' : '';
-    // Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
+    const suffix = { rust: '-rs', go: '-go' }[LANG] || '';
+    // Yritä kielispesifistä ensin (code-small-rs, code-go), sitten perus (code-small)
    const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
    for (const name of candidates) {
        const path = join(__dirname, 'prompts', `${name}.md`);
@@ -91,18 +92,24 @@ const LANG_CONFIG = {
        required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
        dockerImage: 'kipina-cargo-test',
    },
+    go: {
+        goldenDir: 'todo-go',
+        files: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
+        required: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
+        dockerImage: 'kipina-go-test',
+    },
 };
 const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;

 function loadGoldenExample(model) {
    // --compact: käytä tiivistettyä templaattia
    if (COMPACT_MODE) {
-        const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
+        const compactFile = { rust: 'golden-compact-rs.md', go: 'golden-compact-go.md' }[LANG] || 'golden-compact-py.md';
        const compactPath = join(__dirname, 'prompts', compactFile);
        if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
    }
    // Mallikohtainen golden example profiilista
-    const goldenFile = model ? getGoldenForModel(model) : (LANG === 'rust' ? 'todo-rs.md' : 'todo.md');
+    const goldenFile = model ? getGoldenForModel(model) : ({ rust: 'todo-rs.md', go: 'todo-go.md' }[LANG] || 'todo.md');
    const mdPath = join(GOLDEN_DIR, goldenFile);
    if (existsSync(mdPath)) return '\n' + readFileSync(mdPath, 'utf-8').trim() + '\n';
    // Fallback: erilliset tiedostot
@@ -201,7 +208,13 @@ function parseTestOutput(output) {
        const failed = parseInt(cargoMatch[2]);
        return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
    }
-    // Cargo compilation error: count "error[E" occurrences
+    // Go test: "--- PASS:" / "--- FAIL:"
+    const goPassed = (output.match(/--- PASS:/g) || []).length;
+    const goFailed = (output.match(/--- FAIL:/g) || []).length;
+    if (goPassed + goFailed > 0) {
+        return { testsPassed: goPassed, testsFailed: goFailed, testsTotal: goPassed + goFailed };
+    }
+    // Cargo/Go compilation error: count "error[E" or Go compile errors
    const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
    if (compileErrors > 0) {
        return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
@@ -333,7 +346,7 @@ async function runPipeline(model, scenario) {
        // 3. LLM-koodigenerointi
        const fileCount = LCONF.required.length;
        const goldenExample = loadGoldenExample(model);
-        const codeTokens = LANG === 'rust' ? 12288 : 8192;
+        const codeTokens = LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192;
        let files;

        // Orkestrointi: pilko entiteetti kerrallaan pienille malleille
@@ -454,7 +467,7 @@ async function runPipeline(model, scenario) {
        result.fixRounds = fixRound;

        // 5. Testit Docker-kontissa + itsekorjaava looppi (Taso 4)
-        const testLabel = LANG === 'rust' ? 'Cargo test' : 'Pytest';
+        const testLabel = { rust: 'Cargo test', go: 'Go test', python: 'Pytest' }[LANG] || 'Test';
        const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
        const MAX_TEST_FIX = 3;
        let bestFiles = { ...files };       // Paras versio tiedostoista
@@ -538,13 +551,13 @@ async function runPipeline(model, scenario) {
            }

            // Itsekorjaus: syötä virhe + koodi mallille
-            const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E/.test(l)).slice(0, 20).join('\n');
+            const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E|--- FAIL|panic:|\.go:\d+/.test(l)).slice(0, 20).join('\n');
            if (!errorLines) break;  // Ei parsittavia virheitä

            console.log(`    [5/5] Itsekorjaus: ${result.testsFailed || 'virhe'}...`);
            const allCode = Object.entries(files).map(([fn, c]) => `=== ${fn} ===\n${c}`).join('\n\n');
            const fixPrompt = `The following test errors occurred. Fix the code so ALL tests pass. Return ALL files with === markers.\n\nERRORS:\n${errorLines}\n\nCURRENT CODE:\n${allCode}`;
-            const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : 8192);
+            const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192);
            timings.push(fixResp);

            const fixedFiles = parseGeneratedFiles(fixResp.text);