CodeBench: --think lippu thinking-moodin testaamiseen

think:true + 3× token-raja (ajattelu vie ~2/3 tokeneista).
Käyttö: node benchmark.mjs --think --models qwen3:14b
This commit is contained in:
2026-04-14 10:12:44 +03:00
parent 8ba9ef83a3
commit d02f6a51c1

View File

@@ -32,6 +32,7 @@ const SCENARIO_FILTER = arg('scenarios', 'default');
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
const RESULTS_DIR = join(__dirname, 'results');
const THINK_MODE = args.includes('--think');
const MAX_FIX_ROUNDS = 2;
// === Promptien lataus tiedostoista ===
@@ -103,8 +104,8 @@ async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) {
model,
messages,
stream: false,
think: false,
options: { num_predict: maxTokens, num_ctx: 16384, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
think: THINK_MODE,
options: { num_predict: THINK_MODE ? maxTokens * 3 : maxTokens, num_ctx: 16384, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
}),
});
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`);
@@ -336,7 +337,7 @@ async function main() {
console.log('╔══════════════════════════════════════════════╗');
console.log('║ Kipinä CodeBench ║');
console.log('╚══════════════════════════════════════════════╝');
console.log(`Ollama: ${OLLAMA_URL}`);
console.log(`Ollama: ${OLLAMA_URL}${THINK_MODE ? ' 🧠 thinking ON (3× tokens)' : ''}`);
// Haetaan mallit
let models;