CodeBench: Rust-tuki (--lang rust), golden example todo-rs, Dockerfile.cargo-test

- golden-examples/todo-rs/: Axum 0.8 + SQLx + SQLite, 10 testiä - prompts/code-rs.md: Rust-koodingenerointiprompt - Dockerfile.cargo-test: rust:1.87-slim testikontti - benchmark.mjs: --lang python|rust, kieliriippuvainen golden example, parseri tukee cargo test -tuloksia, src/ alihakemistot
2026-04-14 10:55:50 +03:00
parent 9da5540ca2
commit e7b33b7d6f
10 changed files with 1360 additions and 55 deletions
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
 const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
 const RESULTS_DIR = join(__dirname, 'results');
 const THINK_MODE = args.includes('--think');
+const LANG = arg('lang', 'python');  // python | rust
 const MAX_FIX_ROUNDS = 2;

 // === Promptien lataus tiedostoista ===
@@ -43,18 +44,32 @@ function loadPrompt(name) {
 }
 const CLIENT_SYSTEM = loadPrompt('client');
 const SPEC_SYSTEM = loadPrompt('spec');
-const CODE_SYSTEM = loadPrompt('code');
+const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
 const FIX_SYSTEM = loadPrompt('fix');

-// === Kultaisten esimerkkien lataus ===
+// === Kultaisten esimerkkien lataus (kielen mukaan) ===
 const GOLDEN_DIR = join(__dirname, 'golden-examples');
-const GOLDEN_PY_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
+const LANG_CONFIG = {
+    python: {
+        goldenDir: 'todo',
+        files: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
+        required: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
+        dockerImage: 'kipina-pytest',
+    },
+    rust: {
+        goldenDir: 'todo-rs',
+        files: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
+        required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
+        dockerImage: 'kipina-cargo-test',
+    },
+};
+const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;

 function loadGoldenExample() {
-    const todoDir = join(GOLDEN_DIR, 'todo');
+    const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
    if (!existsSync(todoDir)) return '';
-    let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n';
-    for (const f of GOLDEN_PY_FILES) {
+    let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
+    for (const f of LCONF.files) {
        const path = join(todoDir, f);
        if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
    }
@@ -129,14 +144,40 @@ async function ollamaListModels() {
    return (data.models || []).map(m => m.name);
 }

+// === Testitulosten parsinta (pytest + cargo test) ===
+function parseTestOutput(output) {
+    // Pytest: "6 passed", "2 failed", "1 error"
+    const pyPassed = output.match(/(\d+) passed/);
+    const pyFailed = output.match(/(\d+) failed/);
+    const pyError = output.match(/(\d+) error/);
+    if (pyPassed || pyFailed) {
+        const passed = pyPassed ? parseInt(pyPassed[1]) : 0;
+        const failed = (pyFailed ? parseInt(pyFailed[1]) : 0) + (pyError ? parseInt(pyError[1]) : 0);
+        return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
+    }
+    // Cargo test: "test result: ok. 10 passed; 0 failed;"
+    const cargoMatch = output.match(/test result: \w+\.\s*(\d+) passed;\s*(\d+) failed/);
+    if (cargoMatch) {
+        const passed = parseInt(cargoMatch[1]);
+        const failed = parseInt(cargoMatch[2]);
+        return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
+    }
+    // Cargo compilation error: count "error[E" occurrences
+    const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
+    if (compileErrors > 0) {
+        return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
+    }
+    return { testsPassed: 0, testsFailed: 0, testsTotal: 0 };
+}
+
 // === Tiedostoparseri LLM-vastauksesta ===
 function parseGeneratedFiles(text) {
    const files = {};
-    const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/);
+    const sections = text.split(/===\s*(\S+\.(?:py|toml|rs))\s*===/);
    for (let i = 1; i < sections.length - 1; i += 2) {
        const name = sections[i];
        let content = sections[i + 1].trim();
-        content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
+        content = content.replace(/^```(?:python|toml|rust)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
        if (content) files[name] = content + '\n';
    }
    return files;
@@ -251,72 +292,70 @@ async function runPipeline(model, scenario) {

        // 3. LLM-koodigenerointi
        console.log(`    [3/5] Koodigenerointi (LLM)...`);
-        const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 4 files. Follow the reference implementation patterns exactly.`;
+        const fileCount = LCONF.required.length;
+        const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
        result.promptChars = CODE_SYSTEM.length + codePrompt.length;
        result.promptTokensEst = Math.round(result.promptChars / 4);
-        const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
+        const codeTokens = LANG === 'rust' ? 12288 : 8192;
+        const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, codeTokens);
        timings.push(codeResp);
        writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
        const files = parseGeneratedFiles(codeResp.text);
-        const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
-        const missing = required.filter(f => !files[f]);
+        const missing = LCONF.required.filter(f => !files[f]);
        if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }

-        // 4. Validointi + korjaussilmukka
-        let issues = validateProjectCode(files);
+        // 4. Validointi + korjaussilmukka (Python-spesifi)
        let fixRound = 0;
-        while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
-            fixRound++;
-            console.log(`    [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
-            const issuesByFile = {};
-            for (const issue of issues) {
-                const m = issue.match(/^ISSUE:\s*(\S+?):/);
-                const fname = m ? m[1] : 'unknown';
-                if (!issuesByFile[fname]) issuesByFile[fname] = [];
-                issuesByFile[fname].push(issue);
-            }
-            for (const [fname, fIssues] of Object.entries(issuesByFile)) {
-                if (!files[fname]) continue;
-                const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
-                const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
-                timings.push(fixResp);
-                if (fixResp.text) {
-                    files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
+        if (LANG === 'python') {
+            let issues = validateProjectCode(files);
+            while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
+                fixRound++;
+                console.log(`    [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
+                const issuesByFile = {};
+                for (const issue of issues) {
+                    const m = issue.match(/^ISSUE:\s*(\S+?):/);
+                    const fname = m ? m[1] : 'unknown';
+                    if (!issuesByFile[fname]) issuesByFile[fname] = [];
+                    issuesByFile[fname].push(issue);
                }
+                for (const [fname, fIssues] of Object.entries(issuesByFile)) {
+                    if (!files[fname]) continue;
+                    const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
+                    const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
+                    timings.push(fixResp);
+                    if (fixResp.text) {
+                        files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
+                    }
+                }
+                issues = validateProjectCode(files);
            }
-            issues = validateProjectCode(files);
+            result.validationIssues = issues.length;
        }
-        result.validationIssues = issues.length;
        result.fixRounds = fixRound;

-        // Kirjoita LLM:n generoimat Python-tiedostot
+        // Kirjoita LLM:n generoimat tiedostot (luo src/ ja tests/ alihakemistot tarvittaessa)
        for (const [fn, content] of Object.entries(files)) {
-            if (fn.endsWith('.py')) writeFileSync(`${dir}/${fn}`, content);
+            const filePath = join(dir, fn);
+            mkdirSync(dirname(filePath), { recursive: true });
+            writeFileSync(filePath, content);
        }

-        // 5. Pytest Docker-kontissa (kipina-pytest image)
-        console.log(`    [5/5] Pytest (Docker)...`);
+        // 5. Testit Docker-kontissa
+        const testLabel = LANG === 'rust' ? 'Cargo test (Docker)' : 'Pytest (Docker)';
+        console.log(`    [5/5] ${testLabel}...`);
+        const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
        try {
-            const pytestOut = execSync(
-                `docker run --rm -v "${dir}:/src:ro" kipina-pytest 2>&1`,
-                { timeout: 120000, encoding: 'utf-8' }
+            const testOut = execSync(
+                `docker run --rm -v "${dir}:/src:ro" ${LCONF.dockerImage} 2>&1`,
+                { timeout: dockerTimeout, encoding: 'utf-8' }
            );
-            writeFileSync(`${dir}/_pytest.txt`, pytestOut);
-            const passedMatch = pytestOut.match(/(\d+) passed/);
-            const failedMatch = pytestOut.match(/(\d+) failed/);
-            result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
-            result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0;
-            result.testsTotal = result.testsPassed + result.testsFailed;
+            writeFileSync(`${dir}/_testout.txt`, testOut);
+            Object.assign(result, parseTestOutput(testOut));
        } catch (e) {
            const output = e.stdout || e.stderr || e.message || '';
-            writeFileSync(`${dir}/_pytest.txt`, output);
-            const passedMatch = output.match(/(\d+) passed/);
-            const failedMatch = output.match(/(\d+) failed/);
-            const errorMatch = output.match(/(\d+) error/);
-            result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
-            result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0);
-            result.testsTotal = result.testsPassed + result.testsFailed;
-            if (result.testsTotal === 0) result.error = 'Pytest kaatui';
+            writeFileSync(`${dir}/_testout.txt`, output);
+            Object.assign(result, parseTestOutput(output));
+            if (result.testsTotal === 0) result.error = 'Testit kaatuivat';
        }
    } catch (e) {
        result.error = e.message;
@@ -337,7 +376,7 @@ async function main() {
    console.log('╔══════════════════════════════════════════════╗');
    console.log('║       Kipinä CodeBench                      ║');
    console.log('╚══════════════════════════════════════════════╝');
-    console.log(`Ollama: ${OLLAMA_URL}${THINK_MODE ? '  🧠 thinking ON (3× tokens)' : ''}`);
+    console.log(`Ollama: ${OLLAMA_URL}  📝 ${LANG}${THINK_MODE ? '  🧠 thinking ON' : ''}`);

    // Haetaan mallit
    let models;