Files
agentic-studio/kipina-codebench/results/2026-04-14T08-05.json
jaakko 6a40ca5730 CodeBench: golden example markdown-muodossa (koodi + selitykset)
todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu,
mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335).
Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
2026-04-14 12:38:25 +03:00

947 lines
20 KiB
JSON

[
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 1,
"testsFailed": 5,
"totalDurationMs": 30801,
"totalTokens": 2333,
"avgTokPerSec": 122.77922150989748,
"promptChars": 10015,
"promptTokensEst": 2504,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 25495,
"totalTokens": 2714,
"avgTokPerSec": 122.70970007652487,
"promptChars": 9891,
"promptTokensEst": 2473,
"score": 91,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 10,
"testsFailed": 1,
"totalDurationMs": 37153,
"totalTokens": 3979,
"avgTokPerSec": 121.9183958236036,
"promptChars": 11158,
"promptTokensEst": 2790,
"score": 95,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 43456,
"totalTokens": 2411,
"avgTokPerSec": 60.89226084568145,
"promptChars": 9831,
"promptTokensEst": 2458,
"score": 91,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 40376,
"totalTokens": 2237,
"avgTokPerSec": 61.028627032662456,
"promptChars": 9343,
"promptTokensEst": 2336,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 2,
"testsFailed": 10,
"totalDurationMs": 68620,
"totalTokens": 3796,
"avgTokPerSec": 60.47793268944476,
"promptChars": 10497,
"promptTokensEst": 2624,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 25235,
"totalTokens": 2269,
"avgTokPerSec": 101.24212769079884,
"promptChars": 9294,
"promptTokensEst": 2324,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 21720,
"totalTokens": 1942,
"avgTokPerSec": 101.65074583709965,
"promptChars": 9020,
"promptTokensEst": 2255,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 10,
"testsFailed": 1,
"totalDurationMs": 39006,
"totalTokens": 3509,
"avgTokPerSec": 100.43593706181406,
"promptChars": 10372,
"promptTokensEst": 2593,
"score": 95,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 21989,
"totalTokens": 2339,
"avgTokPerSec": 122.8454095677367,
"promptChars": 10052,
"promptTokensEst": 2513,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 23997,
"totalTokens": 2551,
"avgTokPerSec": 122.23722733560855,
"promptChars": 9973,
"promptTokensEst": 2493,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 30169,
"totalTokens": 3249,
"avgTokPerSec": 123.04696524796096,
"promptChars": 11097,
"promptTokensEst": 2774,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 47091,
"totalTokens": 2602,
"avgTokPerSec": 60.962687726457375,
"promptChars": 9633,
"promptTokensEst": 2408,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 41747,
"totalTokens": 2313,
"avgTokPerSec": 60.949025583617605,
"promptChars": 9373,
"promptTokensEst": 2343,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 2,
"testsFailed": 10,
"totalDurationMs": 66888,
"totalTokens": 3699,
"avgTokPerSec": 60.49540514685331,
"promptChars": 10323,
"promptTokensEst": 2581,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 7,
"testsFailed": 1,
"totalDurationMs": 27036,
"totalTokens": 2434,
"avgTokPerSec": 101.01399069228444,
"promptChars": 9513,
"promptTokensEst": 2378,
"score": 93,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 20927,
"totalTokens": 1872,
"avgTokPerSec": 101.45096098956486,
"promptChars": 8881,
"promptTokensEst": 2220,
"score": 91,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 26919,
"totalTokens": 2889,
"avgTokPerSec": 123.63666629145064,
"promptChars": 10162,
"promptTokensEst": 2541,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 27592,
"totalTokens": 2946,
"avgTokPerSec": 122.33273400152825,
"promptChars": 9469,
"promptTokensEst": 2367,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 35734,
"totalTokens": 3827,
"avgTokPerSec": 122.65156559717951,
"promptChars": 11086,
"promptTokensEst": 2772,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 50372,
"totalTokens": 2795,
"avgTokPerSec": 60.91611850918806,
"promptChars": 9758,
"promptTokensEst": 2440,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 1,
"testsFailed": 5,
"totalDurationMs": 38716,
"totalTokens": 2144,
"avgTokPerSec": 61.0412890406478,
"promptChars": 9415,
"promptTokensEst": 2354,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 14,
"testsPassed": 7,
"testsFailed": 7,
"totalDurationMs": 74882,
"totalTokens": 4130,
"avgTokPerSec": 60.32640855026445,
"promptChars": 10506,
"promptTokensEst": 2627,
"score": 70,
"stars": "★★★★☆",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 3,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 35913,
"totalTokens": 3218,
"avgTokPerSec": 100.38516205100154,
"promptChars": 11338,
"promptTokensEst": 2835,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 20974,
"totalTokens": 1880,
"avgTokPerSec": 101.52450928280543,
"promptChars": 8803,
"promptTokensEst": 2201,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 9,
"testsFailed": 2,
"totalDurationMs": 36005,
"totalTokens": 3243,
"avgTokPerSec": 100.44301406462307,
"promptChars": 10414,
"promptTokensEst": 2604,
"score": 89,
"stars": "★★★★☆",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 1,
"testsFailed": 6,
"totalDurationMs": 23071,
"totalTokens": 2469,
"avgTokPerSec": 124.09643322620661,
"promptChars": 9960,
"promptTokensEst": 2490,
"score": 49,
"stars": "★★☆☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 2,
"testsFailed": 6,
"totalDurationMs": 27062,
"totalTokens": 2907,
"avgTokPerSec": 123.35530975346687,
"promptChars": 9558,
"promptTokensEst": 2390,
"score": 55,
"stars": "★★★☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 9,
"testsFailed": 0,
"totalDurationMs": 29395,
"totalTokens": 3156,
"avgTokPerSec": 123.22575073561812,
"promptChars": 10574,
"promptTokensEst": 2644,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 39590,
"totalTokens": 2198,
"avgTokPerSec": 61.051945510465806,
"promptChars": 9664,
"promptTokensEst": 2416,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 1,
"testsFailed": 5,
"totalDurationMs": 36950,
"totalTokens": 2042,
"avgTokPerSec": 61.01436784429489,
"promptChars": 9225,
"promptTokensEst": 2306,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 14,
"testsPassed": 2,
"testsFailed": 12,
"totalDurationMs": 80600,
"totalTokens": 4437,
"avgTokPerSec": 60.29371170543078,
"promptChars": 10688,
"promptTokensEst": 2672,
"score": 49,
"stars": "★★☆☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 29125,
"totalTokens": 2619,
"avgTokPerSec": 100.90587777586212,
"promptChars": 9899,
"promptTokensEst": 2475,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 21847,
"totalTokens": 1957,
"avgTokPerSec": 101.44111070734304,
"promptChars": 8946,
"promptTokensEst": 2237,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 1,
"testsFailed": 5,
"totalDurationMs": 21127,
"totalTokens": 2245,
"avgTokPerSec": 124.22714049663371,
"promptChars": 9972,
"promptTokensEst": 2493,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 7,
"testsFailed": 2,
"totalDurationMs": 30281,
"totalTokens": 3079,
"avgTokPerSec": 123.00254714651271,
"promptChars": 9562,
"promptTokensEst": 2391,
"score": 87,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 39630,
"totalTokens": 4274,
"avgTokPerSec": 123.08303937451802,
"promptChars": 11119,
"promptTokensEst": 2780,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 38032,
"totalTokens": 2104,
"avgTokPerSec": 61.05445464163662,
"promptChars": 9455,
"promptTokensEst": 2364,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 39620,
"totalTokens": 2193,
"avgTokPerSec": 61.04565233675101,
"promptChars": 9481,
"promptTokensEst": 2370,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 7,
"testsFailed": 2,
"totalDurationMs": 63579,
"totalTokens": 3520,
"avgTokPerSec": 60.51513453009977,
"promptChars": 10493,
"promptTokensEst": 2623,
"score": 87,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 30845,
"totalTokens": 2777,
"avgTokPerSec": 100.79046137130972,
"promptChars": 9507,
"promptTokensEst": 2377,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 21413,
"totalTokens": 1914,
"avgTokPerSec": 101.25525436264132,
"promptChars": 8804,
"promptTokensEst": 2201,
"score": 85,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 5
}
]