top3: qwen3-coder:30b ★★★★★ 97p, codestral:22b ★★★★☆ 88p, qwen3.5:35b 40p mistral: codestral:22b 80p, mistral-small3.1 30p, devstral:24b 44p
182 lines
4.0 KiB
JSON
182 lines
4.0 KiB
JSON
[
|
|
{
|
|
"model": "codestral:22b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 6,
|
|
"testsPassed": 6,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 63028,
|
|
"totalTokens": 2390,
|
|
"avgTokPerSec": 44.09843659433429,
|
|
"promptChars": 9567,
|
|
"promptTokensEst": 2392,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "codestral:22b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 4,
|
|
"testsPassed": 4,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 58359,
|
|
"totalTokens": 2313,
|
|
"avgTokPerSec": 44.04431775388366,
|
|
"promptChars": 9641,
|
|
"promptTokensEst": 2410,
|
|
"score": 100,
|
|
"stars": "★★★★★",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "codestral:22b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 1,
|
|
"testsPassed": 0,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 52020,
|
|
"totalTokens": 2073,
|
|
"avgTokPerSec": 44.03716103774298,
|
|
"promptChars": 10007,
|
|
"promptTokensEst": 2502,
|
|
"score": 40,
|
|
"stars": "★★☆☆☆",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "mistral-small3.1:24b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 7,
|
|
"testsPassed": 6,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 76602,
|
|
"totalTokens": 2820,
|
|
"avgTokPerSec": 41.65340751865168,
|
|
"promptChars": 10816,
|
|
"promptTokensEst": 2704,
|
|
"score": 91,
|
|
"stars": "★★★★★",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "mistral-small3.1:24b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 11004,
|
|
"promptTokensEst": 2751,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "Puuttuvat: test_main.py"
|
|
},
|
|
{
|
|
"model": "mistral-small3.1:24b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 0,
|
|
"testsPassed": 0,
|
|
"testsFailed": 0,
|
|
"totalDurationMs": 0,
|
|
"totalTokens": 0,
|
|
"avgTokPerSec": 0,
|
|
"promptChars": 10573,
|
|
"promptTokensEst": 2643,
|
|
"score": 0,
|
|
"stars": "",
|
|
"error": "Puuttuvat: test_main.py"
|
|
},
|
|
{
|
|
"model": "devstral:24b",
|
|
"scenario": "todo",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 1,
|
|
"testsPassed": 0,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 54454,
|
|
"totalTokens": 1952,
|
|
"avgTokPerSec": 42.767057828688735,
|
|
"promptChars": 9829,
|
|
"promptTokensEst": 2457,
|
|
"score": 40,
|
|
"stars": "★★☆☆☆",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "devstral:24b",
|
|
"scenario": "users",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 1,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 5,
|
|
"testsPassed": 1,
|
|
"testsFailed": 4,
|
|
"totalDurationMs": 50447,
|
|
"totalTokens": 1954,
|
|
"avgTokPerSec": 42.79877112859477,
|
|
"promptChars": 9678,
|
|
"promptTokensEst": 2420,
|
|
"score": 52,
|
|
"stars": "★★★☆☆",
|
|
"error": null
|
|
},
|
|
{
|
|
"model": "devstral:24b",
|
|
"scenario": "blog",
|
|
"reqOk": true,
|
|
"specOk": true,
|
|
"specEntities": 2,
|
|
"validationIssues": 0,
|
|
"fixRounds": 0,
|
|
"testsTotal": 1,
|
|
"testsPassed": 0,
|
|
"testsFailed": 1,
|
|
"totalDurationMs": 83061,
|
|
"totalTokens": 3251,
|
|
"avgTokPerSec": 42.647732012717476,
|
|
"promptChars": 10561,
|
|
"promptTokensEst": 2640,
|
|
"score": 40,
|
|
"stars": "★★☆☆☆",
|
|
"error": null
|
|
}
|
|
] |