Files
agentic-studio/kipina-codebench/results/2026-04-14T06-49.json
jaakko e7b33b7d6f CodeBench: Rust-tuki (--lang rust), golden example todo-rs, Dockerfile.cargo-test
- golden-examples/todo-rs/: Axum 0.8 + SQLx + SQLite, 10 testiä
- prompts/code-rs.md: Rust-koodingenerointiprompt
- Dockerfile.cargo-test: rust:1.87-slim testikontti
- benchmark.mjs: --lang python|rust, kieliriippuvainen golden example,
  parseri tukee cargo test -tuloksia, src/ alihakemistot
2026-04-14 10:55:50 +03:00

422 lines
9.2 KiB
JSON

[
{
"model": "qwen3.5:9b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 3,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 65901,
"totalTokens": 5056,
"avgTokPerSec": 82.99139473832963,
"promptChars": 12334,
"promptTokensEst": 3084,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3.5:9b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 1,
"fixRounds": 2,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 74087,
"totalTokens": 5645,
"avgTokPerSec": 83.57073831360164,
"promptChars": 10757,
"promptTokensEst": 2689,
"score": 20,
"stars": "★☆☆☆☆",
"error": null
},
{
"model": "qwen3.5:9b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 49830,
"totalTokens": 3803,
"avgTokPerSec": 83.26266260763309,
"promptChars": 10826,
"promptTokensEst": 2707,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "gemma4:e4b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 57032,
"totalTokens": 4924,
"avgTokPerSec": 106.02334905805122,
"promptChars": 11313,
"promptTokensEst": 2828,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "gemma4:e4b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 5,
"testsFailed": 2,
"totalDurationMs": 54307,
"totalTokens": 5060,
"avgTokPerSec": 106.89447491163497,
"promptChars": 11225,
"promptTokensEst": 2806,
"score": 83,
"stars": "★★★★☆",
"error": null
},
{
"model": "gemma4:e4b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 2,
"testsFailed": 9,
"totalDurationMs": 57080,
"totalTokens": 5310,
"avgTokPerSec": 106.64914988130955,
"promptChars": 11791,
"promptTokensEst": 2948,
"score": 51,
"stars": "★★★☆☆",
"error": null
},
{
"model": "qwen2.5-coder:3b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 3,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 22377,
"totalTokens": 3534,
"avgTokPerSec": 201.24475679283708,
"promptChars": 11479,
"promptTokensEst": 2870,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen2.5-coder:3b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 8,
"fixRounds": 2,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 44520,
"totalTokens": 7495,
"avgTokPerSec": 201.87149050701015,
"promptChars": 11886,
"promptTokensEst": 2972,
"score": 20,
"stars": "★☆☆☆☆",
"error": null
},
{
"model": "qwen2.5-coder:3b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 20136,
"totalTokens": 3338,
"avgTokPerSec": 200.86152095722105,
"promptChars": 11228,
"promptTokensEst": 2807,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen2.5-coder:7b",
"scenario": "todo",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui"
},
{
"model": "qwen2.5-coder:7b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 20012,
"totalTokens": 2119,
"avgTokPerSec": 122.7557304112134,
"promptChars": 10342,
"promptTokensEst": 2586,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen2.5-coder:7b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 26133,
"totalTokens": 2715,
"avgTokPerSec": 121.94987205993503,
"promptChars": 11193,
"promptTokensEst": 2798,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 44757,
"totalTokens": 2156,
"avgTokPerSec": 60.77636586631207,
"promptChars": 9635,
"promptTokensEst": 2409,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 41166,
"totalTokens": 2282,
"avgTokPerSec": 61.14821289733007,
"promptChars": 9575,
"promptTokensEst": 2394,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 66478,
"totalTokens": 3681,
"avgTokPerSec": 60.493817783668725,
"promptChars": 10500,
"promptTokensEst": 2625,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 7,
"testsPassed": 7,
"testsFailed": 0,
"totalDurationMs": 29801,
"totalTokens": 2249,
"avgTokPerSec": 98.5661742189331,
"promptChars": 9615,
"promptTokensEst": 2404,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 22974,
"totalTokens": 2050,
"avgTokPerSec": 101.2398768597589,
"promptChars": 9273,
"promptTokensEst": 2318,
"score": 85,
"stars": "★★★★☆",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 39335,
"totalTokens": 3537,
"avgTokPerSec": 100.10984073540648,
"promptChars": 10525,
"promptTokensEst": 2631,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:4b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 58668,
"totalTokens": 7134,
"avgTokPerSec": 141.76822189196028,
"promptChars": 15202,
"promptTokensEst": 3801,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:4b",
"scenario": "users",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui"
},
{
"model": "qwen3:4b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui"
}
]