todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu, mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335). Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
947 lines
20 KiB
JSON
947 lines
20 KiB
JSON
[
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 1,
|
|
"testsFailed": 5,
|
|
"totalDurationMs": 30801,
|
|
"totalTokens": 2333,
|
|
"avgTokPerSec": 122.77922150989748,
|
|
"promptChars": 10015,
|
|
"promptTokensEst": 2504,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 6,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 25495,
|
|
"totalTokens": 2714,
|
|
"avgTokPerSec": 122.70970007652487,
|
|
"promptChars": 9891,
|
|
"promptTokensEst": 2473,
|
|
"score": 91,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 11,
|
|
"testsPassed": 10,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 37153,
|
|
"totalTokens": 3979,
|
|
"avgTokPerSec": 121.9183958236036,
|
|
"promptChars": 11158,
|
|
"promptTokensEst": 2790,
|
|
"score": 95,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 6,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 43456,
|
|
"totalTokens": 2411,
|
|
"avgTokPerSec": 60.89226084568145,
|
|
"promptChars": 9831,
|
|
"promptTokensEst": 2458,
|
|
"score": 91,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 8,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 40376,
|
|
"totalTokens": 2237,
|
|
"avgTokPerSec": 61.028627032662456,
|
|
"promptChars": 9343,
|
|
"promptTokensEst": 2336,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 12,
|
|
"testsPassed": 2,
|
|
"testsFailed": 10,
|
|
"totalDurationMs": 68620,
|
|
"totalTokens": 3796,
|
|
"avgTokPerSec": 60.47793268944476,
|
|
"promptChars": 10497,
|
|
"promptTokensEst": 2624,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 7,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 25235,
|
|
"totalTokens": 2269,
|
|
"avgTokPerSec": 101.24212769079884,
|
|
"promptChars": 9294,
|
|
"promptTokensEst": 2324,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 6,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 21720,
|
|
"totalTokens": 1942,
|
|
"avgTokPerSec": 101.65074583709965,
|
|
"promptChars": 9020,
|
|
"promptTokensEst": 2255,
|
|
"score": 85,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 11,
|
|
"testsPassed": 10,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 39006,
|
|
"totalTokens": 3509,
|
|
"avgTokPerSec": 100.43593706181406,
|
|
"promptChars": 10372,
|
|
"promptTokensEst": 2593,
|
|
"score": 95,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 6,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 21989,
|
|
"totalTokens": 2339,
|
|
"avgTokPerSec": 122.8454095677367,
|
|
"promptChars": 10052,
|
|
"promptTokensEst": 2513,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 6,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 23997,
|
|
"totalTokens": 2551,
|
|
"avgTokPerSec": 122.23722733560855,
|
|
"promptChars": 9973,
|
|
"promptTokensEst": 2493,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 8,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 30169,
|
|
"totalTokens": 3249,
|
|
"avgTokPerSec": 123.04696524796096,
|
|
"promptChars": 11097,
|
|
"promptTokensEst": 2774,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 6,
|
|
"testsFailed": 3,
|
|
"totalDurationMs": 47091,
|
|
"totalTokens": 2602,
|
|
"avgTokPerSec": 60.962687726457375,
|
|
"promptChars": 9633,
|
|
"promptTokensEst": 2408,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 7,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 41747,
|
|
"totalTokens": 2313,
|
|
"avgTokPerSec": 60.949025583617605,
|
|
"promptChars": 9373,
|
|
"promptTokensEst": 2343,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 12,
|
|
"testsPassed": 2,
|
|
"testsFailed": 10,
|
|
"totalDurationMs": 66888,
|
|
"totalTokens": 3699,
|
|
"avgTokPerSec": 60.49540514685331,
|
|
"promptChars": 10323,
|
|
"promptTokensEst": 2581,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 7,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 27036,
|
|
"totalTokens": 2434,
|
|
"avgTokPerSec": 101.01399069228444,
|
|
"promptChars": 9513,
|
|
"promptTokensEst": 2378,
|
|
"score": 93,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 6,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 20927,
|
|
"totalTokens": 1872,
|
|
"avgTokPerSec": 101.45096098956486,
|
|
"promptChars": 8881,
|
|
"promptTokensEst": 2220,
|
|
"score": 91,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": false,
|
|
"specEntities": 0,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 0,
|
|
"promptTokensEst": 0,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "JSON-speksi epäonnistui",
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 6,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 26919,
|
|
"totalTokens": 2889,
|
|
"avgTokPerSec": 123.63666629145064,
|
|
"promptChars": 10162,
|
|
"promptTokensEst": 2541,
|
|
"score": 85,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 8,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 27592,
|
|
"totalTokens": 2946,
|
|
"avgTokPerSec": 122.33273400152825,
|
|
"promptChars": 9469,
|
|
"promptTokensEst": 2367,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 11,
|
|
"testsPassed": 11,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 35734,
|
|
"totalTokens": 3827,
|
|
"avgTokPerSec": 122.65156559717951,
|
|
"promptChars": 11086,
|
|
"promptTokensEst": 2772,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 6,
|
|
"testsFailed": 3,
|
|
"totalDurationMs": 50372,
|
|
"totalTokens": 2795,
|
|
"avgTokPerSec": 60.91611850918806,
|
|
"promptChars": 9758,
|
|
"promptTokensEst": 2440,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 1,
|
|
"testsFailed": 5,
|
|
"totalDurationMs": 38716,
|
|
"totalTokens": 2144,
|
|
"avgTokPerSec": 61.0412890406478,
|
|
"promptChars": 9415,
|
|
"promptTokensEst": 2354,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 14,
|
|
"testsPassed": 7,
|
|
"testsFailed": 7,
|
|
"totalDurationMs": 74882,
|
|
"totalTokens": 4130,
|
|
"avgTokPerSec": 60.32640855026445,
|
|
"promptChars": 10506,
|
|
"promptTokensEst": 2627,
|
|
"score": 70,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 3,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 35913,
|
|
"totalTokens": 3218,
|
|
"avgTokPerSec": 100.38516205100154,
|
|
"promptChars": 11338,
|
|
"promptTokensEst": 2835,
|
|
"score": 0,
|
|
"stars": "☆☆☆☆☆",
|
|
"error": "Testit kaatuivat",
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 7,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 20974,
|
|
"totalTokens": 1880,
|
|
"avgTokPerSec": 101.52450928280543,
|
|
"promptChars": 8803,
|
|
"promptTokensEst": 2201,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 11,
|
|
"testsPassed": 9,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 36005,
|
|
"totalTokens": 3243,
|
|
"avgTokPerSec": 100.44301406462307,
|
|
"promptChars": 10414,
|
|
"promptTokensEst": 2604,
|
|
"score": 89,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 1,
|
|
"testsFailed": 6,
|
|
"totalDurationMs": 23071,
|
|
"totalTokens": 2469,
|
|
"avgTokPerSec": 124.09643322620661,
|
|
"promptChars": 9960,
|
|
"promptTokensEst": 2490,
|
|
"score": 49,
|
|
"stars": "★★☆☆☆",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 2,
|
|
"testsFailed": 6,
|
|
"totalDurationMs": 27062,
|
|
"totalTokens": 2907,
|
|
"avgTokPerSec": 123.35530975346687,
|
|
"promptChars": 9558,
|
|
"promptTokensEst": 2390,
|
|
"score": 55,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 9,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 29395,
|
|
"totalTokens": 3156,
|
|
"avgTokPerSec": 123.22575073561812,
|
|
"promptChars": 10574,
|
|
"promptTokensEst": 2644,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 6,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 39590,
|
|
"totalTokens": 2198,
|
|
"avgTokPerSec": 61.051945510465806,
|
|
"promptChars": 9664,
|
|
"promptTokensEst": 2416,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 1,
|
|
"testsFailed": 5,
|
|
"totalDurationMs": 36950,
|
|
"totalTokens": 2042,
|
|
"avgTokPerSec": 61.01436784429489,
|
|
"promptChars": 9225,
|
|
"promptTokensEst": 2306,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 14,
|
|
"testsPassed": 2,
|
|
"testsFailed": 12,
|
|
"totalDurationMs": 80600,
|
|
"totalTokens": 4437,
|
|
"avgTokPerSec": 60.29371170543078,
|
|
"promptChars": 10688,
|
|
"promptTokensEst": 2672,
|
|
"score": 49,
|
|
"stars": "★★☆☆☆",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 29125,
|
|
"totalTokens": 2619,
|
|
"avgTokPerSec": 100.90587777586212,
|
|
"promptChars": 9899,
|
|
"promptTokensEst": 2475,
|
|
"score": 0,
|
|
"stars": "☆☆☆☆☆",
|
|
"error": "Testit kaatuivat",
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 8,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 21847,
|
|
"totalTokens": 1957,
|
|
"avgTokPerSec": 101.44111070734304,
|
|
"promptChars": 8946,
|
|
"promptTokensEst": 2237,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": false,
|
|
"specEntities": 0,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 0,
|
|
"promptTokensEst": 0,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "JSON-speksi epäonnistui",
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 1,
|
|
"testsFailed": 5,
|
|
"totalDurationMs": 21127,
|
|
"totalTokens": 2245,
|
|
"avgTokPerSec": 124.22714049663371,
|
|
"promptChars": 9972,
|
|
"promptTokensEst": 2493,
|
|
"score": 50,
|
|
"stars": "★★★☆☆",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 7,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 30281,
|
|
"totalTokens": 3079,
|
|
"avgTokPerSec": 123.00254714651271,
|
|
"promptChars": 9562,
|
|
"promptTokensEst": 2391,
|
|
"score": 87,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3-coder:30b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 12,
|
|
"testsPassed": 12,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 39630,
|
|
"totalTokens": 4274,
|
|
"avgTokPerSec": 123.08303937451802,
|
|
"promptChars": 11119,
|
|
"promptTokensEst": 2780,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 6,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 38032,
|
|
"totalTokens": 2104,
|
|
"avgTokPerSec": 61.05445464163662,
|
|
"promptChars": 9455,
|
|
"promptTokensEst": 2364,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 39620,
|
|
"totalTokens": 2193,
|
|
"avgTokPerSec": 61.04565233675101,
|
|
"promptChars": 9481,
|
|
"promptTokensEst": 2370,
|
|
"score": 0,
|
|
"stars": "☆☆☆☆☆",
|
|
"error": "Testit kaatuivat",
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:14b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 7,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 63579,
|
|
"totalTokens": 3520,
|
|
"avgTokPerSec": 60.51513453009977,
|
|
"promptChars": 10493,
|
|
"promptTokensEst": 2623,
|
|
"score": 87,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 9,
|
|
"testsPassed": 6,
|
|
"testsFailed": 3,
|
|
"totalDurationMs": 30845,
|
|
"totalTokens": 2777,
|
|
"avgTokPerSec": 100.79046137130972,
|
|
"promptChars": 9507,
|
|
"promptTokensEst": 2377,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 8,
|
|
"testsPassed": 6,
|
|
"testsFailed": 2,
|
|
"totalDurationMs": 21413,
|
|
"totalTokens": 1914,
|
|
"avgTokPerSec": 101.25525436264132,
|
|
"promptChars": 8804,
|
|
"promptTokensEst": 2201,
|
|
"score": 85,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"round": 5
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": false,
|
|
"specEntities": 0,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 0,
|
|
"promptTokensEst": 0,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "JSON-speksi epäonnistui",
|
|
"round": 5
|
|
}
|
|
] |