CodeBench: Rust-tuki (--lang rust), golden example todo-rs, Dockerfile.cargo-test

- golden-examples/todo-rs/: Axum 0.8 + SQLx + SQLite, 10 testiä
- prompts/code-rs.md: Rust-koodingenerointiprompt
- Dockerfile.cargo-test: rust:1.87-slim testikontti
- benchmark.mjs: --lang python|rust, kieliriippuvainen golden example,
  parseri tukee cargo test -tuloksia, src/ alihakemistot
This commit is contained in:
2026-04-14 10:55:50 +03:00
parent 9da5540ca2
commit e7b33b7d6f
10 changed files with 1360 additions and 55 deletions

View File

@@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
const RESULTS_DIR = join(__dirname, 'results');
const THINK_MODE = args.includes('--think');
const LANG = arg('lang', 'python'); // python | rust
const MAX_FIX_ROUNDS = 2;
// === Promptien lataus tiedostoista ===
@@ -43,18 +44,32 @@ function loadPrompt(name) {
}
const CLIENT_SYSTEM = loadPrompt('client');
const SPEC_SYSTEM = loadPrompt('spec');
const CODE_SYSTEM = loadPrompt('code');
const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
const FIX_SYSTEM = loadPrompt('fix');
// === Kultaisten esimerkkien lataus ===
// === Kultaisten esimerkkien lataus (kielen mukaan) ===
const GOLDEN_DIR = join(__dirname, 'golden-examples');
const GOLDEN_PY_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
const LANG_CONFIG = {
python: {
goldenDir: 'todo',
files: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
required: ['models.py', 'schemas.py', 'main.py', 'test_main.py'],
dockerImage: 'kipina-pytest',
},
rust: {
goldenDir: 'todo-rs',
files: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
dockerImage: 'kipina-cargo-test',
},
};
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
function loadGoldenExample() {
const todoDir = join(GOLDEN_DIR, 'todo');
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
if (!existsSync(todoDir)) return '';
let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n';
for (const f of GOLDEN_PY_FILES) {
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
for (const f of LCONF.files) {
const path = join(todoDir, f);
if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
}
@@ -129,14 +144,40 @@ async function ollamaListModels() {
return (data.models || []).map(m => m.name);
}
// === Testitulosten parsinta (pytest + cargo test) ===
function parseTestOutput(output) {
// Pytest: "6 passed", "2 failed", "1 error"
const pyPassed = output.match(/(\d+) passed/);
const pyFailed = output.match(/(\d+) failed/);
const pyError = output.match(/(\d+) error/);
if (pyPassed || pyFailed) {
const passed = pyPassed ? parseInt(pyPassed[1]) : 0;
const failed = (pyFailed ? parseInt(pyFailed[1]) : 0) + (pyError ? parseInt(pyError[1]) : 0);
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
}
// Cargo test: "test result: ok. 10 passed; 0 failed;"
const cargoMatch = output.match(/test result: \w+\.\s*(\d+) passed;\s*(\d+) failed/);
if (cargoMatch) {
const passed = parseInt(cargoMatch[1]);
const failed = parseInt(cargoMatch[2]);
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
}
// Cargo compilation error: count "error[E" occurrences
const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
if (compileErrors > 0) {
return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
}
return { testsPassed: 0, testsFailed: 0, testsTotal: 0 };
}
// === Tiedostoparseri LLM-vastauksesta ===
function parseGeneratedFiles(text) {
const files = {};
const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/);
const sections = text.split(/===\s*(\S+\.(?:py|toml|rs))\s*===/);
for (let i = 1; i < sections.length - 1; i += 2) {
const name = sections[i];
let content = sections[i + 1].trim();
content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
content = content.replace(/^```(?:python|toml|rust)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
if (content) files[name] = content + '\n';
}
return files;
@@ -251,72 +292,70 @@ async function runPipeline(model, scenario) {
// 3. LLM-koodigenerointi
console.log(` [3/5] Koodigenerointi (LLM)...`);
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 4 files. Follow the reference implementation patterns exactly.`;
const fileCount = LCONF.required.length;
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
result.promptChars = CODE_SYSTEM.length + codePrompt.length;
result.promptTokensEst = Math.round(result.promptChars / 4);
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
const codeTokens = LANG === 'rust' ? 12288 : 8192;
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, codeTokens);
timings.push(codeResp);
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
const files = parseGeneratedFiles(codeResp.text);
const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py'];
const missing = required.filter(f => !files[f]);
const missing = LCONF.required.filter(f => !files[f]);
if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }
// 4. Validointi + korjaussilmukka
let issues = validateProjectCode(files);
// 4. Validointi + korjaussilmukka (Python-spesifi)
let fixRound = 0;
while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
fixRound++;
console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
const issuesByFile = {};
for (const issue of issues) {
const m = issue.match(/^ISSUE:\s*(\S+?):/);
const fname = m ? m[1] : 'unknown';
if (!issuesByFile[fname]) issuesByFile[fname] = [];
issuesByFile[fname].push(issue);
}
for (const [fname, fIssues] of Object.entries(issuesByFile)) {
if (!files[fname]) continue;
const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
timings.push(fixResp);
if (fixResp.text) {
files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
if (LANG === 'python') {
let issues = validateProjectCode(files);
while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
fixRound++;
console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
const issuesByFile = {};
for (const issue of issues) {
const m = issue.match(/^ISSUE:\s*(\S+?):/);
const fname = m ? m[1] : 'unknown';
if (!issuesByFile[fname]) issuesByFile[fname] = [];
issuesByFile[fname].push(issue);
}
for (const [fname, fIssues] of Object.entries(issuesByFile)) {
if (!files[fname]) continue;
const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
timings.push(fixResp);
if (fixResp.text) {
files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
}
}
issues = validateProjectCode(files);
}
issues = validateProjectCode(files);
result.validationIssues = issues.length;
}
result.validationIssues = issues.length;
result.fixRounds = fixRound;
// Kirjoita LLM:n generoimat Python-tiedostot
// Kirjoita LLM:n generoimat tiedostot (luo src/ ja tests/ alihakemistot tarvittaessa)
for (const [fn, content] of Object.entries(files)) {
if (fn.endsWith('.py')) writeFileSync(`${dir}/${fn}`, content);
const filePath = join(dir, fn);
mkdirSync(dirname(filePath), { recursive: true });
writeFileSync(filePath, content);
}
// 5. Pytest Docker-kontissa (kipina-pytest image)
console.log(` [5/5] Pytest (Docker)...`);
// 5. Testit Docker-kontissa
const testLabel = LANG === 'rust' ? 'Cargo test (Docker)' : 'Pytest (Docker)';
console.log(` [5/5] ${testLabel}...`);
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
try {
const pytestOut = execSync(
`docker run --rm -v "${dir}:/src:ro" kipina-pytest 2>&1`,
{ timeout: 120000, encoding: 'utf-8' }
const testOut = execSync(
`docker run --rm -v "${dir}:/src:ro" ${LCONF.dockerImage} 2>&1`,
{ timeout: dockerTimeout, encoding: 'utf-8' }
);
writeFileSync(`${dir}/_pytest.txt`, pytestOut);
const passedMatch = pytestOut.match(/(\d+) passed/);
const failedMatch = pytestOut.match(/(\d+) failed/);
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0;
result.testsTotal = result.testsPassed + result.testsFailed;
writeFileSync(`${dir}/_testout.txt`, testOut);
Object.assign(result, parseTestOutput(testOut));
} catch (e) {
const output = e.stdout || e.stderr || e.message || '';
writeFileSync(`${dir}/_pytest.txt`, output);
const passedMatch = output.match(/(\d+) passed/);
const failedMatch = output.match(/(\d+) failed/);
const errorMatch = output.match(/(\d+) error/);
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0);
result.testsTotal = result.testsPassed + result.testsFailed;
if (result.testsTotal === 0) result.error = 'Pytest kaatui';
writeFileSync(`${dir}/_testout.txt`, output);
Object.assign(result, parseTestOutput(output));
if (result.testsTotal === 0) result.error = 'Testit kaatuivat';
}
} catch (e) {
result.error = e.message;
@@ -337,7 +376,7 @@ async function main() {
console.log('╔══════════════════════════════════════════════╗');
console.log('║ Kipinä CodeBench ║');
console.log('╚══════════════════════════════════════════════╝');
console.log(`Ollama: ${OLLAMA_URL}${THINK_MODE ? ' 🧠 thinking ON (3× tokens)' : ''}`);
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
// Haetaan mallit
let models;