CodeBench: mallikohtaiset promptiprofiilit (profiles.json)
- profiles.json: malli → profiili → prompti -mappaus - code-small.md: tiivistetty prompti pienille malleille (8b, 4b) - benchmark valitsee automaattisesti oikean promptin mallin perusteella - qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
This commit is contained in:
@@ -46,9 +46,24 @@ function loadPrompt(name) {
|
||||
}
|
||||
const CLIENT_SYSTEM = loadPrompt('client');
|
||||
const SPEC_SYSTEM = loadPrompt('spec');
|
||||
const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
|
||||
const FIX_SYSTEM = loadPrompt('fix');
|
||||
|
||||
// === Mallikohtaiset profiilit ===
|
||||
const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-8'));
|
||||
function getCodePromptForModel(model) {
|
||||
const modelConf = PROFILES.models[model];
|
||||
const profile = modelConf?.profile || PROFILES.default_profile;
|
||||
const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
|
||||
const suffix = LANG === 'rust' ? '-rs' : '';
|
||||
// Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
|
||||
const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
|
||||
for (const name of candidates) {
|
||||
const path = join(__dirname, 'prompts', `${name}.md`);
|
||||
if (existsSync(path)) return { system: readFileSync(path, 'utf-8').trim(), promptName: name, profile };
|
||||
}
|
||||
return { system: loadPrompt('code'), promptName: 'code', profile: 'large' };
|
||||
}
|
||||
|
||||
// === Kultaisten esimerkkien lataus (kielen mukaan) ===
|
||||
const GOLDEN_DIR = join(__dirname, 'golden-examples');
|
||||
const LANG_CONFIG = {
|
||||
@@ -281,6 +296,7 @@ async function runPipeline(model, scenario) {
|
||||
error: null,
|
||||
};
|
||||
const timings = [];
|
||||
const { system: CODE_SYSTEM, promptName, profile } = getCodePromptForModel(model);
|
||||
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
@@ -444,6 +460,8 @@ async function runPipeline(model, scenario) {
|
||||
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
|
||||
result.score = scoreResult(result);
|
||||
result.stars = starsForScore(result.score);
|
||||
result.profile = profile;
|
||||
result.promptName = promptName;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user