CodeBench: Go-tuki — Chi + SQLite + httptest
- Golden example: todo-go/ (6/6 testit läpi) - todo-go.md golden reference - prompts/code-go.md koodigenerointi-prompti - Dockerfile.go-test (golang:1.23-alpine) - benchmark.mjs: LANG_CONFIG, parseTestOutput, prompt/golden-valinta Go:lle - Käyttö: node benchmark.mjs --lang go --models qwen2.5-coder:32b
This commit is contained in:
@@ -34,7 +34,7 @@ const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
|
||||
const RESULTS_DIR = join(__dirname, 'results');
|
||||
const THINK_MODE = args.includes('--think');
|
||||
const COMPACT_MODE = args.includes('--compact');
|
||||
const LANG = arg('lang', 'python'); // python | rust
|
||||
const LANG = arg('lang', 'python'); // python | rust | go
|
||||
const ROUNDS = parseInt(arg('rounds', '1')); // 1-10 toistoa
|
||||
const MAX_FIX_ROUNDS = 2;
|
||||
|
||||
@@ -53,11 +53,12 @@ const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-
|
||||
function getGoldenForModel(model) {
|
||||
const modelConf = PROFILES.models[model];
|
||||
const goldenFile = modelConf?.golden || 'todo.md';
|
||||
// Rust-kielelle vaihda .md → -rs.md (todo.md → todo-rs.md)
|
||||
if (LANG === 'rust') {
|
||||
const rsFile = goldenFile.replace(/\.md$/, '-rs.md');
|
||||
const rsPath = join(GOLDEN_DIR, rsFile);
|
||||
if (existsSync(rsPath)) return rsFile;
|
||||
// Kielispesifi golden: todo.md → todo-rs.md / todo-go.md
|
||||
const langSuffix = { rust: '-rs', go: '-go' }[LANG];
|
||||
if (langSuffix) {
|
||||
const langFile = goldenFile.replace(/\.md$/, `${langSuffix}.md`);
|
||||
const langPath = join(GOLDEN_DIR, langFile);
|
||||
if (existsSync(langPath)) return langFile;
|
||||
}
|
||||
return goldenFile;
|
||||
}
|
||||
@@ -66,8 +67,8 @@ function getCodePromptForModel(model) {
|
||||
const modelConf = PROFILES.models[model];
|
||||
const profile = modelConf?.profile || PROFILES.default_profile;
|
||||
const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
|
||||
const suffix = LANG === 'rust' ? '-rs' : '';
|
||||
// Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
|
||||
const suffix = { rust: '-rs', go: '-go' }[LANG] || '';
|
||||
// Yritä kielispesifistä ensin (code-small-rs, code-go), sitten perus (code-small)
|
||||
const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
|
||||
for (const name of candidates) {
|
||||
const path = join(__dirname, 'prompts', `${name}.md`);
|
||||
@@ -91,18 +92,24 @@ const LANG_CONFIG = {
|
||||
required: ['Cargo.toml', 'src/models.rs', 'src/handlers.rs', 'src/lib.rs', 'src/main.rs', 'tests/api_test.rs'],
|
||||
dockerImage: 'kipina-cargo-test',
|
||||
},
|
||||
go: {
|
||||
goldenDir: 'todo-go',
|
||||
files: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
|
||||
required: ['go.mod', 'models.go', 'handlers.go', 'main.go', 'handlers_test.go'],
|
||||
dockerImage: 'kipina-go-test',
|
||||
},
|
||||
};
|
||||
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
|
||||
|
||||
function loadGoldenExample(model) {
|
||||
// --compact: käytä tiivistettyä templaattia
|
||||
if (COMPACT_MODE) {
|
||||
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
|
||||
const compactFile = { rust: 'golden-compact-rs.md', go: 'golden-compact-go.md' }[LANG] || 'golden-compact-py.md';
|
||||
const compactPath = join(__dirname, 'prompts', compactFile);
|
||||
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
|
||||
}
|
||||
// Mallikohtainen golden example profiilista
|
||||
const goldenFile = model ? getGoldenForModel(model) : (LANG === 'rust' ? 'todo-rs.md' : 'todo.md');
|
||||
const goldenFile = model ? getGoldenForModel(model) : ({ rust: 'todo-rs.md', go: 'todo-go.md' }[LANG] || 'todo.md');
|
||||
const mdPath = join(GOLDEN_DIR, goldenFile);
|
||||
if (existsSync(mdPath)) return '\n' + readFileSync(mdPath, 'utf-8').trim() + '\n';
|
||||
// Fallback: erilliset tiedostot
|
||||
@@ -201,7 +208,13 @@ function parseTestOutput(output) {
|
||||
const failed = parseInt(cargoMatch[2]);
|
||||
return { testsPassed: passed, testsFailed: failed, testsTotal: passed + failed };
|
||||
}
|
||||
// Cargo compilation error: count "error[E" occurrences
|
||||
// Go test: "--- PASS:" / "--- FAIL:"
|
||||
const goPassed = (output.match(/--- PASS:/g) || []).length;
|
||||
const goFailed = (output.match(/--- FAIL:/g) || []).length;
|
||||
if (goPassed + goFailed > 0) {
|
||||
return { testsPassed: goPassed, testsFailed: goFailed, testsTotal: goPassed + goFailed };
|
||||
}
|
||||
// Cargo/Go compilation error: count "error[E" or Go compile errors
|
||||
const compileErrors = (output.match(/error\[E\d+\]/g) || []).length;
|
||||
if (compileErrors > 0) {
|
||||
return { testsPassed: 0, testsFailed: compileErrors, testsTotal: compileErrors };
|
||||
@@ -333,7 +346,7 @@ async function runPipeline(model, scenario) {
|
||||
// 3. LLM-koodigenerointi
|
||||
const fileCount = LCONF.required.length;
|
||||
const goldenExample = loadGoldenExample(model);
|
||||
const codeTokens = LANG === 'rust' ? 12288 : 8192;
|
||||
const codeTokens = LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192;
|
||||
let files;
|
||||
|
||||
// Orkestrointi: pilko entiteetti kerrallaan pienille malleille
|
||||
@@ -454,7 +467,7 @@ async function runPipeline(model, scenario) {
|
||||
result.fixRounds = fixRound;
|
||||
|
||||
// 5. Testit Docker-kontissa + itsekorjaava looppi (Taso 4)
|
||||
const testLabel = LANG === 'rust' ? 'Cargo test' : 'Pytest';
|
||||
const testLabel = { rust: 'Cargo test', go: 'Go test', python: 'Pytest' }[LANG] || 'Test';
|
||||
const dockerTimeout = LANG === 'rust' ? 300000 : 120000;
|
||||
const MAX_TEST_FIX = 3;
|
||||
let bestFiles = { ...files }; // Paras versio tiedostoista
|
||||
@@ -538,13 +551,13 @@ async function runPipeline(model, scenario) {
|
||||
}
|
||||
|
||||
// Itsekorjaus: syötä virhe + koodi mallille
|
||||
const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E/.test(l)).slice(0, 20).join('\n');
|
||||
const errorLines = testOut.split('\n').filter(l => /^E |FAILED|ERROR|error\[E|--- FAIL|panic:|\.go:\d+/.test(l)).slice(0, 20).join('\n');
|
||||
if (!errorLines) break; // Ei parsittavia virheitä
|
||||
|
||||
console.log(` [5/5] Itsekorjaus: ${result.testsFailed || 'virhe'}...`);
|
||||
const allCode = Object.entries(files).map(([fn, c]) => `=== ${fn} ===\n${c}`).join('\n\n');
|
||||
const fixPrompt = `The following test errors occurred. Fix the code so ALL tests pass. Return ALL files with === markers.\n\nERRORS:\n${errorLines}\n\nCURRENT CODE:\n${allCode}`;
|
||||
const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : 8192);
|
||||
const fixResp = await ollamaChat(model, fixPrompt, CODE_SYSTEM, LANG === 'rust' ? 12288 : LANG === 'go' ? 10240 : 8192);
|
||||
timings.push(fixResp);
|
||||
|
||||
const fixedFiles = parseGeneratedFiles(fixResp.text);
|
||||
|
||||
Reference in New Issue
Block a user