Orkestroitu 5 kierrosta: [0, 80, 80, 0, 80] med:80 3/5 kierrosta 100% testit läpi (11/11, 12/12, 11/11). 2/5 kaatui JSON-speksi -vaiheessa (ei orkestroinnin ongelma).
113 lines
2.4 KiB
JSON
113 lines
2.4 KiB
JSON
[
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": false,
|
|
"specEntities": 0,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 0,
|
|
"promptTokensEst": 0,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "JSON-speksi epäonnistui",
|
|
"round": 1
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 3,
|
|
"testsTotal": 11,
|
|
"testsPassed": 11,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 143640,
|
|
"totalTokens": 12611,
|
|
"avgTokPerSec": 96.28061629672216,
|
|
"promptChars": 12125,
|
|
"promptTokensEst": 3031,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"profile": "small",
|
|
"promptName": "code-small",
|
|
"round": 2
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 2,
|
|
"testsTotal": 12,
|
|
"testsPassed": 12,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 116061,
|
|
"totalTokens": 10181,
|
|
"avgTokPerSec": 96.63321228455318,
|
|
"promptChars": 12435,
|
|
"promptTokensEst": 3109,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"profile": "small",
|
|
"promptName": "code-small",
|
|
"round": 3
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": false,
|
|
"specEntities": 0,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 0,
|
|
"promptTokensEst": 0,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "JSON-speksi epäonnistui",
|
|
"round": 4
|
|
},
|
|
{
|
|
"model": "qwen3:8b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 2,
|
|
"testsTotal": 11,
|
|
"testsPassed": 11,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 113792,
|
|
"totalTokens": 10022,
|
|
"avgTokPerSec": 96.96815077469971,
|
|
"promptChars": 12260,
|
|
"promptTokensEst": 3065,
|
|
"score": 80,
|
|
"stars": "★★★★☆",
|
|
"error": null,
|
|
"profile": "small",
|
|
"promptName": "code-small",
|
|
"round": 5
|
|
}
|
|
] |