Files
agentic-studio/kipina-codebench/results/2026-04-14T09-47.json
jaakko 92964e322f CodeBench: mallikohtaiset promptiprofiilit (profiles.json)
- profiles.json: malli → profiili → prompti -mappaus
- code-small.md: tiivistetty prompti pienille malleille (8b, 4b)
- benchmark valitsee automaattisesti oikean promptin mallin perusteella
- qwen3-coder:30b → code.md (large), qwen3:8b → code-small.md (small)
2026-04-14 13:54:26 +03:00

62 lines
1.3 KiB
JSON

[
{
"model": "qwen3:8b",
"scenario": "todo",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 8,
"testsPassed": 6,
"testsFailed": 2,
"totalDurationMs": 97470,
"totalTokens": 8786,
"avgTokPerSec": 97.96636139685832,
"promptChars": 11290,
"promptTokensEst": 2823,
"score": 65,
"stars": "★★★☆☆",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "users",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 6,
"testsPassed": 6,
"testsFailed": 0,
"totalDurationMs": 18951,
"totalTokens": 1666,
"avgTokPerSec": 101.807593927545,
"promptChars": 10293,
"promptTokensEst": 2573,
"score": 100,
"stars": "★★★★★",
"error": null
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 126005,
"totalTokens": 11056,
"avgTokPerSec": 96.6373549161171,
"promptChars": 11878,
"promptTokensEst": 2970,
"score": 20,
"stars": "★☆☆☆☆",
"error": "Syntaksivirhe"
}
]