Files
agentic-studio/kipina-codebench/results/2026-04-14T10-31.json
jaakko 92964e322f CodeBench: mallikohtaiset promptiprofiilit (profiles.json)
- profiles.json: malli → profiili → prompti -mappaus
- code-small.md: tiivistetty prompti pienille malleille (8b, 4b)
- benchmark valitsee automaattisesti oikean promptin mallin perusteella
- qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
2026-04-14 13:54:26 +03:00

317 lines
6.8 KiB
JSON

[
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 97527,
"totalTokens": 2228,
"avgTokPerSec": 100.69171830800946,
"promptChars": 11566,
"promptTokensEst": 2892,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 39549,
"totalTokens": 1960,
"avgTokPerSec": 100.98265593129491,
"promptChars": 11073,
"promptTokensEst": 2768,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 131339,
"totalTokens": 11518,
"avgTokPerSec": 96.52358107464266,
"promptChars": 12388,
"promptTokensEst": 3097,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 20658,
"totalTokens": 1808,
"avgTokPerSec": 101.0081173861862,
"promptChars": 11057,
"promptTokensEst": 2764,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 1,
"fixRounds": 5,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 320031,
"totalTokens": 11985,
"avgTokPerSec": 54.915025374575386,
"promptChars": 12517,
"promptTokensEst": 3129,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 28654,
"totalTokens": 1877,
"avgTokPerSec": 100.70920643946336,
"promptChars": 10747,
"promptTokensEst": 2687,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 1,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 67943,
"totalTokens": 6002,
"avgTokPerSec": 98.29436788902672,
"promptChars": 12389,
"promptTokensEst": 3097,
"score": 90,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 20203,
"totalTokens": 1774,
"avgTokPerSec": 100.9066297884274,
"promptChars": 10905,
"promptTokensEst": 2726,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 13,
"testsPassed": 12,
"testsFailed": 1,
"totalDurationMs": 148491,
"totalTokens": 12747,
"avgTokPerSec": 95.18237885727869,
"promptChars": 12476,
"promptTokensEst": 3119,
"score": 75,
"stars": "★★★★☆",
"error": null,
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 23830,
"totalTokens": 2102,
"avgTokPerSec": 100.641489789061,
"promptChars": 11404,
"promptTokensEst": 2851,
"score": 100,
"stars": "★★★★★",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 122453,
"totalTokens": 7285,
"avgTokPerSec": 94.12482830400619,
"promptChars": 11400,
"promptTokensEst": 2850,
"score": 65,
"stars": "★★★☆☆",
"error": null,
"round": 5
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 11,
"testsPassed": 10,
"testsFailed": 1,
"totalDurationMs": 147125,
"totalTokens": 9893,
"avgTokPerSec": 97.37021605085566,
"promptChars": 12455,
"promptTokensEst": 3114,
"score": 75,
"stars": "★★★★☆",
"error": null,
"round": 5
}
]