CodeBench: mallikohtainen golden example (profiles.json → golden kenttä)

qwen3-coder:30b → todo.md (annotaatiot) qwen3:8b → todo-readme.md (GitHub README -muoto, tutuin koulutusdata) Golden example ladataan dynaamisesti per malli pipelinen sisällä.
2026-04-14 14:04:28 +03:00
parent 0c3303a640
commit a25c52cff4
5 changed files with 483 additions and 8 deletions
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -50,6 +50,12 @@ const FIX_SYSTEM = loadPrompt('fix');

 // === Mallikohtaiset profiilit ===
 const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-8'));
+function getGoldenForModel(model) {
+    const modelConf = PROFILES.models[model];
+    const goldenFile = modelConf?.golden || 'todo.md';
+    return goldenFile;
+}
+
 function getCodePromptForModel(model) {
    const modelConf = PROFILES.models[model];
    const profile = modelConf?.profile || PROFILES.default_profile;
@@ -82,16 +88,16 @@ const LANG_CONFIG = {
 };
 const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;

-function loadGoldenExample() {
+function loadGoldenExample(model) {
    // --compact: käytä tiivistettyä templaattia
    if (COMPACT_MODE) {
        const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
        const compactPath = join(__dirname, 'prompts', compactFile);
        if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
    }
-    // Markdown golden example (koodi + selitykset)
-    const mdName = LANG === 'rust' ? 'todo-rs.md' : 'todo.md';
-    const mdPath = join(GOLDEN_DIR, mdName);
+    // Mallikohtainen golden example profiilista
+    const goldenFile = model ? getGoldenForModel(model) : (LANG === 'rust' ? 'todo-rs.md' : 'todo.md');
+    const mdPath = join(GOLDEN_DIR, goldenFile);
    if (existsSync(mdPath)) return '\n' + readFileSync(mdPath, 'utf-8').trim() + '\n';
    // Fallback: erilliset tiedostot
    const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
@@ -103,7 +109,6 @@ function loadGoldenExample() {
    }
    return example;
 }
-const GOLDEN_EXAMPLE = loadGoldenExample();

 // === Ajattelutagien siivous (gemma4, qwen3/3.5 ym.) ===
 function stripThinking(text) {
@@ -322,7 +327,8 @@ async function runPipeline(model, scenario) {
        // 3. LLM-koodigenerointi
        console.log(`    [3/5] Koodigenerointi (LLM)...`);
        const fileCount = LCONF.required.length;
-        const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
+        const goldenExample = loadGoldenExample(model);
+        const codePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
        result.promptChars = CODE_SYSTEM.length + codePrompt.length;
        result.promptTokensEst = Math.round(result.promptChars / 4);
        const codeTokens = LANG === 'rust' ? 12288 : 8192;