diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs
index f49da1e..ef451dc 100644
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -46,9 +46,24 @@ function loadPrompt(name) {
}
const CLIENT_SYSTEM = loadPrompt('client');
const SPEC_SYSTEM = loadPrompt('spec');
-const CODE_SYSTEM = loadPrompt(LANG === 'rust' ? 'code-rs' : 'code');
const FIX_SYSTEM = loadPrompt('fix');
+// === Mallikohtaiset profiilit ===
+const PROFILES = JSON.parse(readFileSync(join(__dirname, 'profiles.json'), 'utf-8'));
+function getCodePromptForModel(model) {
+ const modelConf = PROFILES.models[model];
+ const profile = modelConf?.profile || PROFILES.default_profile;
+ const promptName = modelConf?.prompt || PROFILES.profiles[profile]?.prompt || 'code';
+ const suffix = LANG === 'rust' ? '-rs' : '';
+ // Yritä kielispesifistä ensin (code-small-rs), sitten perus (code-small)
+ const candidates = [`${promptName}${suffix}`, promptName, `code${suffix}`, 'code'];
+ for (const name of candidates) {
+ const path = join(__dirname, 'prompts', `${name}.md`);
+ if (existsSync(path)) return { system: readFileSync(path, 'utf-8').trim(), promptName: name, profile };
+ }
+ return { system: loadPrompt('code'), promptName: 'code', profile: 'large' };
+}
+
// === Kultaisten esimerkkien lataus (kielen mukaan) ===
const GOLDEN_DIR = join(__dirname, 'golden-examples');
const LANG_CONFIG = {
@@ -281,6 +296,7 @@ async function runPipeline(model, scenario) {
error: null,
};
const timings = [];
+ const { system: CODE_SYSTEM, promptName, profile } = getCodePromptForModel(model);
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
mkdirSync(dir, { recursive: true });
@@ -444,6 +460,8 @@ async function runPipeline(model, scenario) {
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
result.score = scoreResult(result);
result.stars = starsForScore(result.score);
+ result.profile = profile;
+ result.promptName = promptName;
return result;
}
diff --git a/kipina-codebench/profiles.json b/kipina-codebench/profiles.json
new file mode 100644
index 0000000..6ed57dd
--- /dev/null
+++ b/kipina-codebench/profiles.json
@@ -0,0 +1,47 @@
+{
+ "models": {
+ "qwen3-coder:30b": {
+ "profile": "large",
+ "prompt": "code",
+ "golden": "todo.md",
+ "notes": "Pääkooderi. 97p, 188 tok/s. Noudattaa pitkiä sääntölistoja."
+ },
+ "qwen3:8b": {
+ "profile": "small",
+ "prompt": "code-small",
+ "golden": "todo.md",
+ "notes": "Kevyt vaihtoehto. Todo/users 100p, blog heikko. Lyhyt prompti toimii paremmin."
+ },
+ "qwen3:14b": {
+ "profile": "large",
+ "prompt": "code",
+ "golden": "todo.md",
+ "notes": "Poistettu käytöstä. Ei lisäarvoa 30b:hen verrattuna."
+ },
+ "codestral:22b": {
+ "profile": "large",
+ "prompt": "code",
+ "golden": "todo.md",
+ "notes": "Mistral-varamalli. 88p, 44 tok/s."
+ },
+ "qwen3:4b": {
+ "profile": "small",
+ "prompt": "code-small",
+ "golden": "todo.md",
+ "notes": "Minimaali. Vain todo toimii."
+ }
+ },
+ "profiles": {
+ "large": {
+ "prompt": "code",
+ "golden": "todo.md",
+ "description": "Täysi prompti + säännöt. Malleille >=14B."
+ },
+ "small": {
+ "prompt": "code-small",
+ "golden": "todo.md",
+ "description": "Tiivistetty prompti. Malleille <=8B."
+ }
+ },
+ "default_profile": "large"
+}
diff --git a/kipina-codebench/prompts/code-small.md b/kipina-codebench/prompts/code-small.md
new file mode 100644
index 0000000..9a17ecc
--- /dev/null
+++ b/kipina-codebench/prompts/code-small.md
@@ -0,0 +1,22 @@
+Generate a FastAPI project with SQLAlchemy and SQLite. Follow the REFERENCE IMPLEMENTATION exactly.
+
+Generate these 4 files with === markers:
+
+=== models.py ===
+=== schemas.py ===
+=== main.py ===
+=== test_main.py ===
+
+Key patterns (copy from reference):
+- class Base(DeclarativeBase): pass
+- Mapped[str] = mapped_column(String(255))
+- Mapped[str | None] = mapped_column(Text, default=None)
+- model_config = ConfigDict(from_attributes=True)
+- model_dump() not dict()
+- POST 201, GET list, GET by id 404, PUT, DELETE 204
+
+CRITICAL:
+- Use ONLY fields from the JSON spec — no created_at or extra fields
+- Generate EXACTLY 6 tests per entity: create, list, get_by_id, not_found, update, delete
+- No search, filter, or other extra tests
+- No markdown fences in output
diff --git a/kipina-codebench/results/2026-04-14T09-43.html b/kipina-codebench/results/2026-04-14T09-43.html
new file mode 100644
index 0000000..e77d625
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-43.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T09-43.json b/kipina-codebench/results/2026-04-14T09-43.json
new file mode 100644
index 0000000..c54701c
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-43.json
@@ -0,0 +1,22 @@
+[
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 21688,
+ "totalTokens": 2243,
+ "avgTokPerSec": 121.7719614197307,
+ "promptChars": 11588,
+ "promptTokensEst": 2897,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null
+ }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-44.html b/kipina-codebench/results/2026-04-14T09-44.html
new file mode 100644
index 0000000..b8ea684
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-44.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T09-44.json b/kipina-codebench/results/2026-04-14T09-44.json
new file mode 100644
index 0000000..b011487
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-44.json
@@ -0,0 +1,62 @@
+[
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 23521,
+ "totalTokens": 2090,
+ "avgTokPerSec": 100.94324085271073,
+ "promptChars": 10962,
+ "promptTokensEst": 2741,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 1,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 33680,
+ "totalTokens": 3003,
+ "avgTokPerSec": 100.52754588753601,
+ "promptChars": 10171,
+ "promptTokensEst": 2543,
+ "score": 90,
+ "stars": "★★★★★",
+ "error": null
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui"
+ }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-47.html b/kipina-codebench/results/2026-04-14T09-47.html
new file mode 100644
index 0000000..448c02e
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-47.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T09-47.json b/kipina-codebench/results/2026-04-14T09-47.json
new file mode 100644
index 0000000..0275dd6
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-47.json
@@ -0,0 +1,62 @@
+[
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 8,
+ "testsPassed": 6,
+ "testsFailed": 2,
+ "totalDurationMs": 97470,
+ "totalTokens": 8786,
+ "avgTokPerSec": 97.96636139685832,
+ "promptChars": 11290,
+ "promptTokensEst": 2823,
+ "score": 65,
+ "stars": "★★★☆☆",
+ "error": null
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 18951,
+ "totalTokens": 1666,
+ "avgTokPerSec": 101.807593927545,
+ "promptChars": 10293,
+ "promptTokensEst": 2573,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 1,
+ "testsPassed": 0,
+ "testsFailed": 1,
+ "totalDurationMs": 126005,
+ "totalTokens": 11056,
+ "avgTokPerSec": 96.6373549161171,
+ "promptChars": 11878,
+ "promptTokensEst": 2970,
+ "score": 20,
+ "stars": "★☆☆☆☆",
+ "error": "Syntaksivirhe"
+ }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T09-52.html b/kipina-codebench/results/2026-04-14T09-52.html
new file mode 100644
index 0000000..562f3ec
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-52.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T09-52.json b/kipina-codebench/results/2026-04-14T09-52.json
new file mode 100644
index 0000000..4e366ef
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T09-52.json
@@ -0,0 +1,947 @@
+[
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 25444,
+ "totalTokens": 2661,
+ "avgTokPerSec": 122.06801173056196,
+ "promptChars": 11849,
+ "promptTokensEst": 2962,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 24447,
+ "totalTokens": 2537,
+ "avgTokPerSec": 121.11837170891442,
+ "promptChars": 11045,
+ "promptTokensEst": 2761,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 11,
+ "testsPassed": 11,
+ "testsFailed": 0,
+ "totalDurationMs": 38071,
+ "totalTokens": 3965,
+ "avgTokPerSec": 120.37309655579647,
+ "promptChars": 12702,
+ "promptTokensEst": 3176,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 38459,
+ "totalTokens": 2106,
+ "avgTokPerSec": 60.889088461567745,
+ "promptChars": 10951,
+ "promptTokensEst": 2738,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 35959,
+ "totalTokens": 1966,
+ "avgTokPerSec": 60.9684885562545,
+ "promptChars": 10698,
+ "promptTokensEst": 2675,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 13,
+ "testsPassed": 2,
+ "testsFailed": 11,
+ "totalDurationMs": 269370,
+ "totalTokens": 14361,
+ "avgTokPerSec": 57.79069860126629,
+ "promptChars": 11838,
+ "promptTokensEst": 2960,
+ "score": 29,
+ "stars": "★★☆☆☆",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 23199,
+ "totalTokens": 2054,
+ "avgTokPerSec": 101.09280595816365,
+ "promptChars": 10854,
+ "promptTokensEst": 2714,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 1,
+ "testsPassed": 0,
+ "testsFailed": 1,
+ "totalDurationMs": 72665,
+ "totalTokens": 6586,
+ "avgTokPerSec": 99.40636298490288,
+ "promptChars": 10157,
+ "promptTokensEst": 2539,
+ "score": 20,
+ "stars": "★☆☆☆☆",
+ "error": "Syntaksivirhe",
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 136309,
+ "totalTokens": 12036,
+ "avgTokPerSec": 97.02525169408467,
+ "promptChars": 10823,
+ "promptTokensEst": 2706,
+ "score": 0,
+ "stars": "☆☆☆☆☆",
+ "error": "Testit kaatuivat",
+ "round": 1
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 28177,
+ "totalTokens": 2946,
+ "avgTokPerSec": 121.23541038097,
+ "promptChars": 11836,
+ "promptTokensEst": 2959,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 8,
+ "testsPassed": 8,
+ "testsFailed": 0,
+ "totalDurationMs": 22631,
+ "totalTokens": 2352,
+ "avgTokPerSec": 121.93930190168658,
+ "promptChars": 10440,
+ "promptTokensEst": 2610,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 12,
+ "testsPassed": 12,
+ "testsFailed": 0,
+ "totalDurationMs": 40394,
+ "totalTokens": 4225,
+ "avgTokPerSec": 120.84107397324551,
+ "promptChars": 12362,
+ "promptTokensEst": 3091,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 46081,
+ "totalTokens": 2542,
+ "avgTokPerSec": 60.93046828700026,
+ "promptChars": 11412,
+ "promptTokensEst": 2853,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 41323,
+ "totalTokens": 2272,
+ "avgTokPerSec": 60.99406174164295,
+ "promptChars": 10884,
+ "promptTokensEst": 2721,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 14,
+ "testsPassed": 2,
+ "testsFailed": 12,
+ "totalDurationMs": 262591,
+ "totalTokens": 14129,
+ "avgTokPerSec": 57.91340837830759,
+ "promptChars": 12143,
+ "promptTokensEst": 3036,
+ "score": 29,
+ "stars": "★★☆☆☆",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 24007,
+ "totalTokens": 2137,
+ "avgTokPerSec": 101.05982103292858,
+ "promptChars": 10756,
+ "promptTokensEst": 2689,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 7,
+ "testsPassed": 6,
+ "testsFailed": 1,
+ "totalDurationMs": 68739,
+ "totalTokens": 6199,
+ "avgTokPerSec": 98.9825675198183,
+ "promptChars": 10313,
+ "promptTokensEst": 2578,
+ "score": 71,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 2
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 23472,
+ "totalTokens": 2427,
+ "avgTokPerSec": 120.85293828875076,
+ "promptChars": 11663,
+ "promptTokensEst": 2916,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 8,
+ "testsPassed": 8,
+ "testsFailed": 0,
+ "totalDurationMs": 25864,
+ "totalTokens": 2671,
+ "avgTokPerSec": 120.6883137195962,
+ "promptChars": 11148,
+ "promptTokensEst": 2787,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 12,
+ "testsPassed": 12,
+ "testsFailed": 0,
+ "totalDurationMs": 41074,
+ "totalTokens": 4275,
+ "avgTokPerSec": 120.33351485161673,
+ "promptChars": 12664,
+ "promptTokensEst": 3166,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 40457,
+ "totalTokens": 2229,
+ "avgTokPerSec": 61.093615619948345,
+ "promptChars": 10905,
+ "promptTokensEst": 2726,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 1,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 77506,
+ "totalTokens": 4268,
+ "avgTokPerSec": 60.19655522627278,
+ "promptChars": 11135,
+ "promptTokensEst": 2784,
+ "score": 90,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 12,
+ "testsPassed": 12,
+ "testsFailed": 0,
+ "totalDurationMs": 74791,
+ "totalTokens": 3590,
+ "avgTokPerSec": 60.549298891176214,
+ "promptChars": 11653,
+ "promptTokensEst": 2913,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 26402,
+ "totalTokens": 2358,
+ "avgTokPerSec": 100.76936895480246,
+ "promptChars": 11243,
+ "promptTokensEst": 2811,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 20751,
+ "totalTokens": 1837,
+ "avgTokPerSec": 101.05480893032836,
+ "promptChars": 10553,
+ "promptTokensEst": 2638,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 3
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 22098,
+ "totalTokens": 2283,
+ "avgTokPerSec": 121.81254413612446,
+ "promptChars": 11503,
+ "promptTokensEst": 2876,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 2,
+ "testsTotal": 8,
+ "testsPassed": 8,
+ "testsFailed": 0,
+ "totalDurationMs": 65403,
+ "totalTokens": 6779,
+ "avgTokPerSec": 118.13288294758586,
+ "promptChars": 10939,
+ "promptTokensEst": 2735,
+ "score": 80,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 10,
+ "testsPassed": 10,
+ "testsFailed": 0,
+ "totalDurationMs": 36044,
+ "totalTokens": 3748,
+ "avgTokPerSec": 120.14822967005487,
+ "promptChars": 12639,
+ "promptTokensEst": 3160,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 38501,
+ "totalTokens": 2113,
+ "avgTokPerSec": 61.01814139430428,
+ "promptChars": 10929,
+ "promptTokensEst": 2732,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 8,
+ "testsPassed": 1,
+ "testsFailed": 7,
+ "totalDurationMs": 147057,
+ "totalTokens": 7799,
+ "avgTokPerSec": 56.209406465865904,
+ "promptChars": 11207,
+ "promptTokensEst": 2802,
+ "score": 28,
+ "stars": "★★☆☆☆",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 12,
+ "testsPassed": 12,
+ "testsFailed": 0,
+ "totalDurationMs": 227508,
+ "totalTokens": 12026,
+ "avgTokPerSec": 58.52888492610325,
+ "promptChars": 11809,
+ "promptTokensEst": 2952,
+ "score": 80,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 11,
+ "testsPassed": 11,
+ "testsFailed": 0,
+ "totalDurationMs": 131964,
+ "totalTokens": 11403,
+ "avgTokPerSec": 97.10963264920952,
+ "promptChars": 11786,
+ "promptTokensEst": 2947,
+ "score": 80,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 38820,
+ "totalTokens": 1826,
+ "avgTokPerSec": 101.07773707712924,
+ "promptChars": 10568,
+ "promptTokensEst": 2642,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 4
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 1,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 39797,
+ "totalTokens": 3776,
+ "avgTokPerSec": 120.91801837211113,
+ "promptChars": 11435,
+ "promptTokensEst": 2859,
+ "score": 90,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 9,
+ "testsPassed": 8,
+ "testsFailed": 1,
+ "totalDurationMs": 87836,
+ "totalTokens": 9343,
+ "avgTokPerSec": 119.28783662683314,
+ "promptChars": 10718,
+ "promptTokensEst": 2680,
+ "score": 73,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3-coder:30b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 10,
+ "testsPassed": 10,
+ "testsFailed": 0,
+ "totalDurationMs": 36644,
+ "totalTokens": 3897,
+ "avgTokPerSec": 122.28607796191666,
+ "promptChars": 12598,
+ "promptTokensEst": 3150,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 1,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 127532,
+ "totalTokens": 3919,
+ "avgTokPerSec": 34.13133325491828,
+ "promptChars": 11352,
+ "promptTokensEst": 2838,
+ "score": 90,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 8,
+ "testsPassed": 6,
+ "testsFailed": 2,
+ "totalDurationMs": 217365,
+ "totalTokens": 7764,
+ "avgTokPerSec": 38.67613170588518,
+ "promptChars": 10834,
+ "promptTokensEst": 2709,
+ "score": 65,
+ "stars": "★★★☆☆",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:14b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 14,
+ "testsPassed": 7,
+ "testsFailed": 7,
+ "totalDurationMs": 248311,
+ "totalTokens": 13443,
+ "avgTokPerSec": 58.05680015263308,
+ "promptChars": 12219,
+ "promptTokensEst": 3055,
+ "score": 50,
+ "stars": "★★★☆☆",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 38326,
+ "totalTokens": 2079,
+ "avgTokPerSec": 100.89778087504016,
+ "promptChars": 10908,
+ "promptTokensEst": 2727,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 60823,
+ "totalTokens": 1772,
+ "avgTokPerSec": 96.76383996716295,
+ "promptChars": 10378,
+ "promptTokensEst": 2595,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 11,
+ "testsPassed": 11,
+ "testsFailed": 0,
+ "totalDurationMs": 81654,
+ "totalTokens": 3458,
+ "avgTokPerSec": 95.65675360193613,
+ "promptChars": 11914,
+ "promptTokensEst": 2979,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ }
+]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T10-03.html b/kipina-codebench/results/2026-04-14T10-03.html
new file mode 100644
index 0000000..b7b16cf
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-03.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T10-03.json b/kipina-codebench/results/2026-04-14T10-03.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-03.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/kipina-codebench/results/2026-04-14T10-31.html b/kipina-codebench/results/2026-04-14T10-31.html
new file mode 100644
index 0000000..815a791
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-31.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Kipina Model Benchmark
+
+
+
+
+Mallikohtainen yhteenveto
+
+
+Kaikki tulokset
+
+
+
+
+
diff --git a/kipina-codebench/results/2026-04-14T10-31.json b/kipina-codebench/results/2026-04-14T10-31.json
new file mode 100644
index 0000000..3a904cf
--- /dev/null
+++ b/kipina-codebench/results/2026-04-14T10-31.json
@@ -0,0 +1,317 @@
+[
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 97527,
+ "totalTokens": 2228,
+ "avgTokPerSec": 100.69171830800946,
+ "promptChars": 11566,
+ "promptTokensEst": 2892,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 39549,
+ "totalTokens": 1960,
+ "avgTokPerSec": 100.98265593129491,
+ "promptChars": 11073,
+ "promptTokensEst": 2768,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 1
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 131339,
+ "totalTokens": 11518,
+ "avgTokPerSec": 96.52358107464266,
+ "promptChars": 12388,
+ "promptTokensEst": 3097,
+ "score": 0,
+ "stars": "☆☆☆☆☆",
+ "error": "Testit kaatuivat",
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 20658,
+ "totalTokens": 1808,
+ "avgTokPerSec": 101.0081173861862,
+ "promptChars": 11057,
+ "promptTokensEst": 2764,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 2
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 1,
+ "fixRounds": 5,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 320031,
+ "totalTokens": 11985,
+ "avgTokPerSec": 54.915025374575386,
+ "promptChars": 12517,
+ "promptTokensEst": 3129,
+ "score": 0,
+ "stars": "☆☆☆☆☆",
+ "error": "Testit kaatuivat",
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 7,
+ "testsPassed": 7,
+ "testsFailed": 0,
+ "totalDurationMs": 28654,
+ "totalTokens": 1877,
+ "avgTokPerSec": 100.70920643946336,
+ "promptChars": 10747,
+ "promptTokensEst": 2687,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": false,
+ "specEntities": 0,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 0,
+ "testsPassed": 0,
+ "testsFailed": 0,
+ "totalDurationMs": 0,
+ "totalTokens": 0,
+ "avgTokPerSec": 0,
+ "promptChars": 0,
+ "promptTokensEst": 0,
+ "score": 0,
+ "stars": "",
+ "error": "JSON-speksi epäonnistui",
+ "round": 3
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 1,
+ "testsTotal": 12,
+ "testsPassed": 12,
+ "testsFailed": 0,
+ "totalDurationMs": 67943,
+ "totalTokens": 6002,
+ "avgTokPerSec": 98.29436788902672,
+ "promptChars": 12389,
+ "promptTokensEst": 3097,
+ "score": 90,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 20203,
+ "totalTokens": 1774,
+ "avgTokPerSec": 100.9066297884274,
+ "promptChars": 10905,
+ "promptTokensEst": 2726,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 13,
+ "testsPassed": 12,
+ "testsFailed": 1,
+ "totalDurationMs": 148491,
+ "totalTokens": 12747,
+ "avgTokPerSec": 95.18237885727869,
+ "promptChars": 12476,
+ "promptTokensEst": 3119,
+ "score": 75,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 4
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "todo",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 0,
+ "testsTotal": 6,
+ "testsPassed": 6,
+ "testsFailed": 0,
+ "totalDurationMs": 23830,
+ "totalTokens": 2102,
+ "avgTokPerSec": 100.641489789061,
+ "promptChars": 11404,
+ "promptTokensEst": 2851,
+ "score": 100,
+ "stars": "★★★★★",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "users",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 1,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 8,
+ "testsPassed": 6,
+ "testsFailed": 2,
+ "totalDurationMs": 122453,
+ "totalTokens": 7285,
+ "avgTokPerSec": 94.12482830400619,
+ "promptChars": 11400,
+ "promptTokensEst": 2850,
+ "score": 65,
+ "stars": "★★★☆☆",
+ "error": null,
+ "round": 5
+ },
+ {
+ "model": "qwen3:8b",
+ "scenario": "blog",
+ "reqOk": true,
+ "specOk": true,
+ "specEntities": 2,
+ "validationIssues": 0,
+ "fixRounds": 3,
+ "testsTotal": 11,
+ "testsPassed": 10,
+ "testsFailed": 1,
+ "totalDurationMs": 147125,
+ "totalTokens": 9893,
+ "avgTokPerSec": 97.37021605085566,
+ "promptChars": 12455,
+ "promptTokensEst": 3114,
+ "score": 75,
+ "stars": "★★★★☆",
+ "error": null,
+ "round": 5
+ }
+]
\ No newline at end of file