Files
agentic-studio/kipina-codebench/results/2026-04-14T08-18.json
jaakko 6a40ca5730 CodeBench: golden example markdown-muodossa (koodi + selitykset)
todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu,
mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335).
Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
2026-04-14 12:38:25 +03:00

947 lines
21 KiB
JSON

[
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 33892,
"totalTokens": 2675,
"avgTokPerSec": 88.07409036121237,
"promptChars": 9688,
"promptTokensEst": 2422,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 30647,
"totalTokens": 2549,
"avgTokPerSec": 88.4488185974085,
"promptChars": 9594,
"promptTokensEst": 2399,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 13,
"testsPassed": 6,
"testsFailed": 7,
"totalDurationMs": 44371,
"totalTokens": 3678,
"avgTokPerSec": 88.172616246191,
"promptChars": 10432,
"promptTokensEst": 2608,
"score": 68,
"stars": "★★★☆☆",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 18385,
"totalTokens": 2375,
"avgTokPerSec": 147.62230806597154,
"promptChars": 9478,
"promptTokensEst": 2370,
"score": 91,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 13968,
"totalTokens": 1904,
"avgTokPerSec": 148.3084817167518,
"promptChars": 8837,
"promptTokensEst": 2209,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 25642,
"totalTokens": 3476,
"avgTokPerSec": 146.49556892944076,
"promptChars": 10734,
"promptTokensEst": 2684,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 19982,
"totalTokens": 2937,
"avgTokPerSec": 191.2786317674431,
"promptChars": 10281,
"promptTokensEst": 2570,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 17114,
"totalTokens": 2903,
"avgTokPerSec": 190.51221206765385,
"promptChars": 9654,
"promptTokensEst": 2414,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 22352,
"totalTokens": 3776,
"avgTokPerSec": 190.56628728306987,
"promptChars": 11134,
"promptTokensEst": 2784,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 31217,
"totalTokens": 2463,
"avgTokPerSec": 88.6684646675098,
"promptChars": 9598,
"promptTokensEst": 2400,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 27520,
"totalTokens": 2288,
"avgTokPerSec": 88.64765360012593,
"promptChars": 9612,
"promptTokensEst": 2403,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 3,
"testsFailed": 9,
"totalDurationMs": 41874,
"totalTokens": 3474,
"avgTokPerSec": 88.22266853318554,
"promptChars": 10408,
"promptTokensEst": 2602,
"score": 55,
"stars": "★★★☆☆",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 24781,
"totalTokens": 3240,
"avgTokPerSec": 146.89167309934365,
"promptChars": 10179,
"promptTokensEst": 2545,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 19148,
"totalTokens": 2605,
"avgTokPerSec": 147.55250620481297,
"promptChars": 9634,
"promptTokensEst": 2409,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 23816,
"totalTokens": 3232,
"avgTokPerSec": 147.25857324533817,
"promptChars": 9226,
"promptTokensEst": 2307,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 16639,
"totalTokens": 2369,
"avgTokPerSec": 191.61273045157245,
"promptChars": 10048,
"promptTokensEst": 2512,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 8,
"testsFailed": 1,
"totalDurationMs": 18588,
"totalTokens": 3163,
"avgTokPerSec": 190.86975006725547,
"promptChars": 10048,
"promptTokensEst": 2512,
"score": 93,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 10,
"testsFailed": 0,
"totalDurationMs": 22677,
"totalTokens": 3828,
"avgTokPerSec": 190.15611016906482,
"promptChars": 11090,
"promptTokensEst": 2773,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 26449,
"totalTokens": 2063,
"avgTokPerSec": 88.77498453063184,
"promptChars": 9608,
"promptTokensEst": 2402,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 27510,
"totalTokens": 2289,
"avgTokPerSec": 88.74699253414485,
"promptChars": 9418,
"promptTokensEst": 2355,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 3,
"testsFailed": 9,
"totalDurationMs": 45105,
"totalTokens": 3738,
"avgTokPerSec": 88.04788102995212,
"promptChars": 10564,
"promptTokensEst": 2641,
"score": 55,
"stars": "★★★☆☆",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 7,
"testsFailed": 1,
"totalDurationMs": 19204,
"totalTokens": 2480,
"avgTokPerSec": 147.91758782382294,
"promptChars": 9391,
"promptTokensEst": 2348,
"score": 93,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 12990,
"totalTokens": 1769,
"avgTokPerSec": 148.2616673700717,
"promptChars": 8898,
"promptTokensEst": 2225,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 10,
"testsFailed": 2,
"totalDurationMs": 25831,
"totalTokens": 3500,
"avgTokPerSec": 146.86924785880186,
"promptChars": 9465,
"promptTokensEst": 2366,
"score": 90,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 19453,
"totalTokens": 2845,
"avgTokPerSec": 191.37382231956113,
"promptChars": 10157,
"promptTokensEst": 2539,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 9,
"testsFailed": 0,
"totalDurationMs": 21570,
"totalTokens": 3529,
"avgTokPerSec": 190.65454623497536,
"promptChars": 9732,
"promptTokensEst": 2433,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 25537,
"totalTokens": 4300,
"avgTokPerSec": 189.94521619124598,
"promptChars": 11127,
"promptTokensEst": 2782,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 7,
"testsFailed": 2,
"totalDurationMs": 31923,
"totalTokens": 2522,
"avgTokPerSec": 88.62182881661799,
"promptChars": 9700,
"promptTokensEst": 2425,
"score": 87,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 26000,
"totalTokens": 2163,
"avgTokPerSec": 88.86878707672254,
"promptChars": 9288,
"promptTokensEst": 2322,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 10,
"testsFailed": 0,
"totalDurationMs": 43275,
"totalTokens": 3588,
"avgTokPerSec": 88.24995936347965,
"promptChars": 10173,
"promptTokensEst": 2543,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 14,
"testsPassed": 0,
"testsFailed": 14,
"totalDurationMs": 30045,
"totalTokens": 3913,
"avgTokPerSec": 146.51683619371713,
"promptChars": 10334,
"promptTokensEst": 2584,
"score": 40,
"stars": "★★☆☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 5,
"testsFailed": 4,
"totalDurationMs": 17076,
"totalTokens": 2321,
"avgTokPerSec": 147.99547121069506,
"promptChars": 9451,
"promptTokensEst": 2363,
"score": 73,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 23890,
"totalTokens": 3243,
"avgTokPerSec": 147.20125507974117,
"promptChars": 9217,
"promptTokensEst": 2304,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 21812,
"totalTokens": 3246,
"avgTokPerSec": 191.07801335688654,
"promptChars": 10249,
"promptTokensEst": 2562,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 8,
"testsFailed": 1,
"totalDurationMs": 20325,
"totalTokens": 3441,
"avgTokPerSec": 190.10241840094508,
"promptChars": 9930,
"promptTokensEst": 2483,
"score": 93,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 26087,
"totalTokens": 4387,
"avgTokPerSec": 189.8005689388054,
"promptChars": 11109,
"promptTokensEst": 2777,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 30287,
"totalTokens": 2388,
"avgTokPerSec": 88.72243320918638,
"promptChars": 9695,
"promptTokensEst": 2424,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 31212,
"totalTokens": 2601,
"avgTokPerSec": 88.71289036919063,
"promptChars": 9619,
"promptTokensEst": 2405,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 15,
"testsPassed": 3,
"testsFailed": 12,
"totalDurationMs": 50939,
"totalTokens": 4217,
"avgTokPerSec": 88.06125722020734,
"promptChars": 10743,
"promptTokensEst": 2686,
"score": 52,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 17913,
"totalTokens": 2310,
"avgTokPerSec": 148.0291268001691,
"promptChars": 9357,
"promptTokensEst": 2339,
"score": 91,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 13948,
"totalTokens": 1898,
"avgTokPerSec": 148.37907379944423,
"promptChars": 8725,
"promptTokensEst": 2181,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 1,
"testsFailed": 5,
"totalDurationMs": 15229,
"totalTokens": 2119,
"avgTokPerSec": 192.33007410215646,
"promptChars": 9827,
"promptTokensEst": 2457,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 18223,
"totalTokens": 3093,
"avgTokPerSec": 190.71372054282037,
"promptChars": 9641,
"promptTokensEst": 2410,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 1,
"testsFailed": 9,
"totalDurationMs": 21215,
"totalTokens": 3589,
"avgTokPerSec": 190.49493540345176,
"promptChars": 11180,
"promptTokensEst": 2795,
"score": 46,
"stars": "★★☆☆☆",
"error": null,
"round": 5
}
]