Files
agentic-studio/kipina-codebench/results/2026-04-14T07-13.json
jaakko e7b33b7d6f CodeBench: Rust-tuki (--lang rust), golden example todo-rs, Dockerfile.cargo-test
- golden-examples/todo-rs/: Axum 0.8 + SQLx + SQLite, 10 testiä
- prompts/code-rs.md: Rust-koodingenerointiprompt
- Dockerfile.cargo-test: rust:1.87-slim testikontti
- benchmark.mjs: --lang python|rust, kieliriippuvainen golden example,
  parseri tukee cargo test -tuloksia, src/ alihakemistot
2026-04-14 10:55:50 +03:00

122 lines
2.6 KiB
JSON

[
{
"model": "qwen3:14b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 186642,
"totalTokens": 10237,
"avgTokPerSec": 59.06411550065281,
"promptChars": 10576,
"promptTokensEst": 2644,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 121848,
"totalTokens": 6735,
"avgTokPerSec": 59.85231850668119,
"promptChars": 9684,
"promptTokensEst": 2421,
"score": 40,
"stars": "★★☆☆☆",
"error": null
},
{
"model": "qwen3:14b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 9,
"testsFailed": 2,
"totalDurationMs": 83491,
"totalTokens": 4677,
"avgTokPerSec": 60.222832434869694,
"promptChars": 10423,
"promptTokensEst": 2606,
"score": 89,
"stars": "★★★★☆",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 56288,
"totalTokens": 5235,
"avgTokPerSec": 99.60027546406452,
"promptChars": 9307,
"promptTokensEst": 2327,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 5,
"testsFailed": 1,
"totalDurationMs": 59639,
"totalTokens": 5526,
"avgTokPerSec": 99.6742208632186,
"promptChars": 9158,
"promptTokensEst": 2290,
"score": 90,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 11,
"testsPassed": 10,
"testsFailed": 1,
"totalDurationMs": 131793,
"totalTokens": 11779,
"avgTokPerSec": 97.17878362853351,
"promptChars": 10390,
"promptTokensEst": 2598,
"score": 95,
"stars": "★★★★★",
"error": null
}
]