Files
agentic-studio/kipina-codebench/results/2026-04-14T07-18.json
jaakko e7b33b7d6f CodeBench: Rust-tuki (--lang rust), golden example todo-rs, Dockerfile.cargo-test
- golden-examples/todo-rs/: Axum 0.8 + SQLx + SQLite, 10 testiä
- prompts/code-rs.md: Rust-koodingenerointiprompt
- Dockerfile.cargo-test: rust:1.87-slim testikontti
- benchmark.mjs: --lang python|rust, kieliriippuvainen golden example,
  parseri tukee cargo test -tuloksia, src/ alihakemistot
2026-04-14 10:55:50 +03:00

122 lines
2.6 KiB
JSON

[
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 66903,
"totalTokens": 5454,
"avgTokPerSec": 86.45918994499432,
"promptChars": 9985,
"promptTokensEst": 2496,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 87618,
"totalTokens": 7150,
"avgTokPerSec": 87.21782190501095,
"promptChars": 9922,
"promptTokensEst": 2481,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 9,
"testsPassed": 5,
"testsFailed": 4,
"totalDurationMs": 78398,
"totalTokens": 6427,
"avgTokPerSec": 85.52353711143463,
"promptChars": 10737,
"promptTokensEst": 2684,
"score": 73,
"stars": "★★★★☆",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 8,
"testsPassed": 7,
"testsFailed": 1,
"totalDurationMs": 82750,
"totalTokens": 10054,
"avgTokPerSec": 139.90690936146032,
"promptChars": 9360,
"promptTokensEst": 2340,
"score": 93,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 32233,
"totalTokens": 4404,
"avgTokPerSec": 143.4997404058814,
"promptChars": 9310,
"promptTokensEst": 2328,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 88563,
"totalTokens": 11575,
"avgTokPerSec": 141.54675017528362,
"promptChars": 10567,
"promptTokensEst": 2642,
"score": 40,
"stars": "★★☆☆☆",
"error": null
}
]