Benchmark-tulokset: orkestrointi nosti 8b blogin 0p → 80p (med)

Orkestroitu 5 kierrosta: [0, 80, 80, 0, 80] med:80
3/5 kierrosta 100% testit läpi (11/11, 12/12, 11/11).
2/5 kaatui JSON-speksi -vaiheessa (ei orkestroinnin ongelma).
This commit is contained in:
2026-04-14 15:45:27 +03:00
parent 0b926c2cad
commit dcdb360098
6 changed files with 892 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
[
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 1
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 3,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 143640,
"totalTokens": 12611,
"avgTokPerSec": 96.28061629672216,
"promptChars": 12125,
"promptTokensEst": 3031,
"score": 80,
"stars": "★★★★☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 2
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 2,
"testsTotal": 12,
"testsPassed": 12,
"testsFailed": 0,
"totalDurationMs": 116061,
"totalTokens": 10181,
"avgTokPerSec": 96.63321228455318,
"promptChars": 12435,
"promptTokensEst": 3109,
"score": 80,
"stars": "★★★★☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 3
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": false,
"specEntities": 0,
"validationIssues": 0,
"fixRounds": 0,
"testsTotal": 0,
"testsPassed": 0,
"testsFailed": 0,
"totalDurationMs": 0,
"totalTokens": 0,
"avgTokPerSec": 0,
"promptChars": 0,
"promptTokensEst": 0,
"score": 0,
"stars": "",
"error": "JSON-speksi epäonnistui",
"round": 4
},
{
"model": "qwen3:8b",
"scenario": "blog",
"reqOk": true,
"specOk": true,
"specEntities": 2,
"validationIssues": 0,
"fixRounds": 2,
"testsTotal": 11,
"testsPassed": 11,
"testsFailed": 0,
"totalDurationMs": 113792,
"totalTokens": 10022,
"avgTokPerSec": 96.96815077469971,
"promptChars": 12260,
"promptTokensEst": 3065,
"score": 80,
"stars": "★★★★☆",
"error": null,
"profile": "small",
"promptName": "code-small",
"round": 5
}
]