Files
agentic-studio/kipina-codebench/results/2026-04-14T07-55.json
jaakko 6a40ca5730 CodeBench: golden example markdown-muodossa (koodi + selitykset)
todo.md yhdistää koodin ja annotaatiot: miksi pattern on valittu,
mitä EI saa tehdä. 1567 tokenia (vs raaka 1340, compact 335).
Benchmark lataa .md-version oletuksena, fallback erillisiin tiedostoihin.
2026-04-14 12:38:25 +03:00

122 lines
2.6 KiB
JSON

[
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 6,
"testsFailed": 3,
"totalDurationMs": 50350,
"totalTokens": 2797,
"avgTokPerSec": 60.919860198859574,
"promptChars": 9858,
"promptTokensEst": 2465,
"score": 80,
"stars": "★★★★☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 46557,
"totalTokens": 2584,
"avgTokPerSec": 60.88834523948,
"promptChars": 9544,
"promptTokensEst": 2386,
"score": 85,
"stars": "★★★★☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 15,
"testsPassed": 2,
"testsFailed": 13,
"totalDurationMs": 90761,
"totalTokens": 4979,
"avgTokPerSec": 60.19247492391319,
"promptChars": 10521,
"promptTokensEst": 2630,
"score": 48,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 27360,
"totalTokens": 2466,
"avgTokPerSec": 100.9922018173994,
"promptChars": 9767,
"promptTokensEst": 2442,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat"
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 20920,
"totalTokens": 1876,
"avgTokPerSec": 101.60760023892685,
"promptChars": 8782,
"promptTokensEst": 2196,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 10,
"testsPassed": 9,
"testsFailed": 1,
"totalDurationMs": 35766,
"totalTokens": 3217,
"avgTokPerSec": 100.40066102398943,
"promptChars": 10334,
"promptTokensEst": 2584,
"score": 94,
"stars": "★★★★★",
"error": null
}
]