Benchmark-tulokset: orkestrointi nosti 8b blogin 0p → 80p (med)
Orkestroitu 5 kierrosta: [0, 80, 80, 0, 80] med:80 3/5 kierrosta 100% testit läpi (11/11, 12/12, 11/11). 2/5 kaatui JSON-speksi -vaiheessa (ei orkestroinnin ongelma).
This commit is contained in:
117
kipina-codebench/results/2026-04-14T11-54.json
Normal file
117
kipina-codebench/results/2026-04-14T11-54.json
Normal file
@@ -0,0 +1,117 @@
|
||||
[
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 0,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 0,
|
||||
"totalDurationMs": 79193,
|
||||
"totalTokens": 10304,
|
||||
"avgTokPerSec": 141.2083113764173,
|
||||
"promptChars": 12199,
|
||||
"promptTokensEst": 3050,
|
||||
"score": 0,
|
||||
"stars": "☆☆☆☆☆",
|
||||
"error": "Testit kaatuivat",
|
||||
"profile": "small",
|
||||
"promptName": "code-small",
|
||||
"round": 1
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 10,
|
||||
"testsPassed": 6,
|
||||
"testsFailed": 4,
|
||||
"totalDurationMs": 66764,
|
||||
"totalTokens": 8896,
|
||||
"avgTokPerSec": 142.57944640796882,
|
||||
"promptChars": 12391,
|
||||
"promptTokensEst": 3098,
|
||||
"score": 56,
|
||||
"stars": "★★★☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-small",
|
||||
"round": 2
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 2,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 1,
|
||||
"testsPassed": 0,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 76403,
|
||||
"totalTokens": 9962,
|
||||
"avgTokPerSec": 137.0023398819064,
|
||||
"promptChars": 12432,
|
||||
"promptTokensEst": 3108,
|
||||
"score": 20,
|
||||
"stars": "★☆☆☆☆",
|
||||
"error": "Syntaksivirhe",
|
||||
"profile": "small",
|
||||
"promptName": "code-small",
|
||||
"round": 3
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 13,
|
||||
"testsPassed": 7,
|
||||
"testsFailed": 6,
|
||||
"totalDurationMs": 81345,
|
||||
"totalTokens": 10535,
|
||||
"avgTokPerSec": 139.42076339875726,
|
||||
"promptChars": 11419,
|
||||
"promptTokensEst": 2855,
|
||||
"score": 52,
|
||||
"stars": "★★★☆☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-small",
|
||||
"round": 4
|
||||
},
|
||||
{
|
||||
"model": "qwen3:8b",
|
||||
"scenario": "blog",
|
||||
"reqOk": true,
|
||||
"specOk": true,
|
||||
"specEntities": 1,
|
||||
"validationIssues": 0,
|
||||
"fixRounds": 3,
|
||||
"testsTotal": 12,
|
||||
"testsPassed": 11,
|
||||
"testsFailed": 1,
|
||||
"totalDurationMs": 72723,
|
||||
"totalTokens": 9567,
|
||||
"avgTokPerSec": 141.2709378394512,
|
||||
"promptChars": 11416,
|
||||
"promptTokensEst": 2854,
|
||||
"score": 75,
|
||||
"stars": "★★★★☆",
|
||||
"error": null,
|
||||
"profile": "small",
|
||||
"promptName": "code-small",
|
||||
"round": 5
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user