CodeBench: mallikohtaiset promptiprofiilit (profiles.json)
- profiles.json: malli → profiili → prompti -mappaus - code-small.md: tiivistetty prompti pienille malleille (8b, 4b) - benchmark valitsee automaattisesti oikean promptin mallin perusteella - qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
This commit is contained in:
317
kipina-codebench/results/2026-04-14T10-31.json
Normal file
317
kipina-codebench/results/2026-04-14T10-31.json
Normal file
@@ -0,0 +1,317 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 97527,
|
||||
"totalTokens": 2228,
|
||||
"avgTokPerSec": 100.69171830800946,
|
||||
"promptChars": 11566,
|
||||
"promptTokensEst": 2892,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 7,
|
||||
"testsPassed": 7,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 39549,
|
||||
"totalTokens": 1960,
|
||||
"avgTokPerSec": 100.98265593129491,
|
||||
"promptChars": 11073,
|
||||
"promptTokensEst": 2768,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": false,
|
||||
"specEntities": 0,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 0,
|
||||
"promptTokensEst": 0,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "JSON-speksi epäonnistui",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 131339,
|
||||
"totalTokens": 11518,
|
||||
"avgTokPerSec": 96.52358107464266,
|
||||
"promptChars": 12388,
|
||||
"promptTokensEst": 3097,
|
||||
"score": 0,
|
||||
"stars": "☆☆☆☆☆",
|
||||
"error": "Testit kaatuivat",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 20658,
|
||||
"totalTokens": 1808,
|
||||
"avgTokPerSec": 101.0081173861862,
|
||||
"promptChars": 11057,
|
||||
"promptTokensEst": 2764,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": false,
|
||||
"specEntities": 0,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 0,
|
||||
"promptTokensEst": 0,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "JSON-speksi epäonnistui",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 1,
|
||||
"fixRounds": 5,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 320031,
|
||||
"totalTokens": 11985,
|
||||
"avgTokPerSec": 54.915025374575386,
|
||||
"promptChars": 12517,
|
||||
"promptTokensEst": 3129,
|
||||
"score": 0,
|
||||
"stars": "☆☆☆☆☆",
|
||||
"error": "Testit kaatuivat",
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 7,
|
||||
"testsPassed": 7,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 28654,
|
||||
"totalTokens": 1877,
|
||||
"avgTokPerSec": 100.70920643946336,
|
||||
"promptChars": 10747,
|
||||
"promptTokensEst": 2687,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": false,
|
||||
"specEntities": 0,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 0,
|
||||
"totalTokens": 0,
|
||||
"avgTokPerSec": 0,
|
||||
"promptChars": 0,
|
||||
"promptTokensEst": 0,
|
||||
"score": 0,
|
||||
"stars": "",
|
||||
"error": "JSON-speksi epäonnistui",
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 1,
|
||||
"testsTotal": 12,
|
||||
"testsPassed": 12,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 67943,
|
||||
"totalTokens": 6002,
|
||||
"avgTokPerSec": 98.29436788902672,
|
||||
"promptChars": 12389,
|
||||
"promptTokensEst": 3097,
|
||||
"score": 90,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 4
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 20203,
|
||||
"totalTokens": 1774,
|
||||
"avgTokPerSec": 100.9066297884274,
|
||||
"promptChars": 10905,
|
||||
"promptTokensEst": 2726,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 4
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 13,
|
||||
"testsPassed": 12,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 148491,
|
||||
"totalTokens": 12747,
|
||||
"avgTokPerSec": 95.18237885727869,
|
||||
"promptChars": 12476,
|
||||
"promptTokensEst": 3119,
|
||||
"score": 75,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"round": 4
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "todo",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 0,
|
||||
"testsTotal": 6,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 23830,
|
||||
"totalTokens": 2102,
|
||||
"avgTokPerSec": 100.641489789061,
|
||||
"promptChars": 11404,
|
||||
"promptTokensEst": 2851,
|
||||
"score": 100,
|
||||
"stars": "★★★★★",
|
||||
"error": null,
|
||||
"round": 5
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "users",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 8,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 2,
|
||||
"totalDurationMs": 122453,
|
||||
"totalTokens": 7285,
|
||||
"avgTokPerSec": 94.12482830400619,
|
||||
"promptChars": 11400,
|
||||
"promptTokensEst": 2850,
|
||||
"score": 65,
|
||||
"stars": "★★★☆☆",
|
||||
"error": null,
|
||||
"round": 5
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 11,
|
||||
"testsPassed": 10,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 147125,
|
||||
"totalTokens": 9893,
|
||||
"avgTokPerSec": 97.37021605085566,
|
||||
"promptChars": 12455,
|
||||
"promptTokensEst": 3114,
|
||||
"score": 75,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"round": 5
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user