Files
agentic-studio/kipina-codebench/results/2026-04-14T11-54.json
jaakko dcdb360098 Benchmark-tulokset: orkestrointi nosti 8b blogin 0p → 80p (med)
Orkestroitu 5 kierrosta: [0, 80, 80, 0, 80] med:80
3/5 kierrosta 100% testit läpi (11/11, 12/12, 11/11).
2/5 kaatui JSON-speksi -vaiheessa (ei orkestroinnin ongelma).
2026-04-14 15:45:27 +03:00

117 lines
2.6 KiB
JSON

[
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 79193,
"totalTokens": 10304,
"avgTokPerSec": 141.2083113764173,
"promptChars": 12199,
"promptTokensEst": 3050,
"score": 0,
"stars": "☆☆☆☆☆",
"error": "Testit kaatuivat",
"profile": "small",
"promptName": "code-small",
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 10,
"testsPassed": 6,
"testsFailed": 4,
"totalDurationMs": 66764,
"totalTokens": 8896,
"avgTokPerSec": 142.57944640796882,
"promptChars": 12391,
"promptTokensEst": 3098,
"score": 56,
"stars": "★★★☆☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 1,
"testsPassed": 0,
"testsFailed": 1,
"totalDurationMs": 76403,
"totalTokens": 9962,
"avgTokPerSec": 137.0023398819064,
"promptChars": 12432,
"promptTokensEst": 3108,
"score": 20,
"stars": "★☆☆☆☆",
"error": "Syntaksivirhe",
"profile": "small",
"promptName": "code-small",
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 13,
"testsPassed": 7,
"testsFailed": 6,
"totalDurationMs": 81345,
"totalTokens": 10535,
"avgTokPerSec": 139.42076339875726,
"promptChars": 11419,
"promptTokensEst": 2855,
"score": 52,
"stars": "★★★☆☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 1,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 12,
"testsPassed": 11,
"testsFailed": 1,
"totalDurationMs": 72723,
"totalTokens": 9567,
"avgTokPerSec": 141.2709378394512,
"promptChars": 11416,
"promptTokensEst": 2854,
"score": 75,
"stars": "★★★★☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 5
}
]