CodeBench: mallikohtaiset promptiprofiilit (profiles.json)

- profiles.json: malli → profiili → prompti -mappaus
- code-small.md: tiivistetty prompti pienille malleille (8b, 4b)
- benchmark valitsee automaattisesti oikean promptin mallin perusteella
- qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
This commit is contained in:
2026-04-14 13:54:26 +03:00
parent e54c1b057c
commit 92964e322f
15 changed files with 2597 additions and 1 deletions

View File

@@ -46,9 +46,24 @@ function loadPrompt(name) {
}
const CLIENT_SYSTEM = loadPrompt('client');
const SPEC_SYSTEM = loadPrompt('spec');
const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
const FIX_SYSTEM = loadPrompt('fix');
// === Mallikohtaiset profiilit ===
const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-8'));
function getCodePromptForModel(model) {
const modelConf = PROFILES.models[model];
const profile = modelConf?.profile || PROFILES.default_profile;
const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
const suffix = LANG === 'rust' ? '-rs' : '';
// Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
for (const name of candidates) {
const path = join(__dirname, 'prompts', `${name}.md`);
if (existsSync(path)) return { system: readFileSync(path, 'utf-8').trim(), promptName: name, profile };
}
return { system: loadPrompt('code'), promptName: 'code', profile: 'large' };
}
// === Kultaisten esimerkkien lataus (kielen mukaan) ===
const GOLDEN_DIR = join(__dirname, 'golden-examples');
const LANG_CONFIG = {
@@ -281,6 +296,7 @@ async function runPipeline(model, scenario) {
error: null,
};
const timings = [];
const { system: CODE_SYSTEM, promptName, profile } = getCodePromptForModel(model);
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
mkdirSync(dir, { recursive: true });
@@ -444,6 +460,8 @@ async function runPipeline(model, scenario) {
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
result.score = scoreResult(result);
result.stars = starsForScore(result.score);
result.profile = profile;
result.promptName = promptName;
return result;
}