CodeBench: mallikohtaiset promptiprofiilit (profiles.json)

- profiles.json: malli → profiili → prompti -mappaus
- code-small.md: tiivistetty prompti pienille malleille (8b, 4b)
- benchmark valitsee automaattisesti oikean promptin mallin perusteella
- qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
This commit is contained in:
2026-04-14 13:54:26 +03:00
parent e54c1b057c
commit 92964e322f
15 changed files with 2597 additions and 1 deletions

View File

@@ -0,0 +1,947 @@
[
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 25444,
"totalTokens": 2661,
"avgTokPerSec": 122.06801173056196,
"promptChars": 11849,
"promptTokensEst": 2962,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 24447,
"totalTokens": 2537,
"avgTokPerSec": 121.11837170891442,
"promptChars": 11045,
"promptTokensEst": 2761,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 38071,
"totalTokens": 3965,
"avgTokPerSec": 120.37309655579647,
"promptChars": 12702,
"promptTokensEst": 3176,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 38459,
"totalTokens": 2106,
"avgTokPerSec": 60.889088461567745,
"promptChars": 10951,
"promptTokensEst": 2738,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 35959,
"totalTokens": 1966,
"avgTokPerSec": 60.9684885562545,
"promptChars": 10698,
"promptTokensEst": 2675,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 13,
"testsPassed": 2,
"testsFailed": 11,
"totalDurationMs": 269370,
"totalTokens": 14361,
"avgTokPerSec": 57.79069860126629,
"promptChars": 11838,
"promptTokensEst": 2960,
"score": 29,
"stars": "★★☆☆☆",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 23199,
"totalTokens": 2054,
"avgTokPerSec": 101.09280595816365,
"promptChars": 10854,
"promptTokensEst": 2714,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 72665,
"totalTokens": 6586,
"avgTokPerSec": 99.40636298490288,
"promptChars": 10157,
"promptTokensEst": 2539,
"score": 20,
"stars": "★☆☆☆☆",
"error": "Syntaksivirhe",
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 136309,
"totalTokens": 12036,
"avgTokPerSec": 97.02525169408467,
"promptChars": 10823,
"promptTokensEst": 2706,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 1
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 28177,
"totalTokens": 2946,
"avgTokPerSec": 121.23541038097,
"promptChars": 11836,
"promptTokensEst": 2959,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 22631,
"totalTokens": 2352,
"avgTokPerSec": 121.93930190168658,
"promptChars": 10440,
"promptTokensEst": 2610,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 40394,
"totalTokens": 4225,
"avgTokPerSec": 120.84107397324551,
"promptChars": 12362,
"promptTokensEst": 3091,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 46081,
"totalTokens": 2542,
"avgTokPerSec": 60.93046828700026,
"promptChars": 11412,
"promptTokensEst": 2853,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 41323,
"totalTokens": 2272,
"avgTokPerSec": 60.99406174164295,
"promptChars": 10884,
"promptTokensEst": 2721,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 14,
"testsPassed": 2,
"testsFailed": 12,
"totalDurationMs": 262591,
"totalTokens": 14129,
"avgTokPerSec": 57.91340837830759,
"promptChars": 12143,
"promptTokensEst": 3036,
"score": 29,
"stars": "★★☆☆☆",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 24007,
"totalTokens": 2137,
"avgTokPerSec": 101.05982103292858,
"promptChars": 10756,
"promptTokensEst": 2689,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 7,
"testsPassed": 6,
"testsFailed": 1,
"totalDurationMs": 68739,
"totalTokens": 6199,
"avgTokPerSec": 98.9825675198183,
"promptChars": 10313,
"promptTokensEst": 2578,
"score": 71,
"stars": "★★★★☆",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 2
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 23472,
"totalTokens": 2427,
"avgTokPerSec": 120.85293828875076,
"promptChars": 11663,
"promptTokensEst": 2916,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 25864,
"totalTokens": 2671,
"avgTokPerSec": 120.6883137195962,
"promptChars": 11148,
"promptTokensEst": 2787,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 41074,
"totalTokens": 4275,
"avgTokPerSec": 120.33351485161673,
"promptChars": 12664,
"promptTokensEst": 3166,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 40457,
"totalTokens": 2229,
"avgTokPerSec": 61.093615619948345,
"promptChars": 10905,
"promptTokensEst": 2726,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 1,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 77506,
"totalTokens": 4268,
"avgTokPerSec": 60.19655522627278,
"promptChars": 11135,
"promptTokensEst": 2784,
"score": 90,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 74791,
"totalTokens": 3590,
"avgTokPerSec": 60.549298891176214,
"promptChars": 11653,
"promptTokensEst": 2913,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 26402,
"totalTokens": 2358,
"avgTokPerSec": 100.76936895480246,
"promptChars": 11243,
"promptTokensEst": 2811,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 20751,
"totalTokens": 1837,
"avgTokPerSec": 101.05480893032836,
"promptChars": 10553,
"promptTokensEst": 2638,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 3
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 22098,
"totalTokens": 2283,
"avgTokPerSec": 121.81254413612446,
"promptChars": 11503,
"promptTokensEst": 2876,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 2,
"testsTotal": 8,
"testsPassed": 8,
"testsFailed": 0,
"totalDurationMs": 65403,
"totalTokens": 6779,
"avgTokPerSec": 118.13288294758586,
"promptChars": 10939,
"promptTokensEst": 2735,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 10,
"testsFailed": 0,
"totalDurationMs": 36044,
"totalTokens": 3748,
"avgTokPerSec": 120.14822967005487,
"promptChars": 12639,
"promptTokensEst": 3160,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 38501,
"totalTokens": 2113,
"avgTokPerSec": 61.01814139430428,
"promptChars": 10929,
"promptTokensEst": 2732,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 8,
"testsPassed": 1,
"testsFailed": 7,
"totalDurationMs": 147057,
"totalTokens": 7799,
"avgTokPerSec": 56.209406465865904,
"promptChars": 11207,
"promptTokensEst": 2802,
"score": 28,
"stars": "★★☆☆☆",
"error": null,
"round": 4
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 227508,
"totalTokens": 12026,
"avgTokPerSec": 58.52888492610325,
"promptChars": 11809,
"promptTokensEst": 2952,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 131964,
"totalTokens": 11403,
"avgTokPerSec": 97.10963264920952,
"promptChars": 11786,
"promptTokensEst": 2947,
"score": 80,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 38820,
"totalTokens": 1826,
"avgTokPerSec": 101.07773707712924,
"promptChars": 10568,
"promptTokensEst": 2642,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 4
},
{
"model": "qwen3-coder:30b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 1,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 39797,
"totalTokens": 3776,
"avgTokPerSec": 120.91801837211113,
"promptChars": 11435,
"promptTokensEst": 2859,
"score": 90,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 9,
"testsPassed": 8,
"testsFailed": 1,
"totalDurationMs": 87836,
"totalTokens": 9343,
"avgTokPerSec": 119.28783662683314,
"promptChars": 10718,
"promptTokensEst": 2680,
"score": 73,
"stars": "★★★★☆",
"error": null,
"round": 5
},
{
"model": "qwen3-coder:30b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 10,
"testsFailed": 0,
"totalDurationMs": 36644,
"totalTokens": 3897,
"avgTokPerSec": 122.28607796191666,
"promptChars": 12598,
"promptTokensEst": 3150,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 1,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 127532,
"totalTokens": 3919,
"avgTokPerSec": 34.13133325491828,
"promptChars": 11352,
"promptTokensEst": 2838,
"score": 90,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 217365,
"totalTokens": 7764,
"avgTokPerSec": 38.67613170588518,
"promptChars": 10834,
"promptTokensEst": 2709,
"score": 65,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 14,
"testsPassed": 7,
"testsFailed": 7,
"totalDurationMs": 248311,
"totalTokens": 13443,
"avgTokPerSec": 58.05680015263308,
"promptChars": 12219,
"promptTokensEst": 3055,
"score": 50,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 38326,
"totalTokens": 2079,
"avgTokPerSec": 100.89778087504016,
"promptChars": 10908,
"promptTokensEst": 2727,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 60823,
"totalTokens": 1772,
"avgTokPerSec": 96.76383996716295,
"promptChars": 10378,
"promptTokensEst": 2595,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 81654,
"totalTokens": 3458,
"avgTokPerSec": 95.65675360193613,
"promptChars": 11914,
"promptTokensEst": 2979,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
}
]