diff --git a/network-poc/tests/benchmark-results/2026-04-14_mistral.html b/network-poc/tests/benchmark-results/2026-04-14_mistral.html new file mode 100644 index 0000000..06898f7 --- /dev/null +++ b/network-poc/tests/benchmark-results/2026-04-14_mistral.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/network-poc/tests/benchmark-results/2026-04-14_mistral.json b/network-poc/tests/benchmark-results/2026-04-14_mistral.json new file mode 100644 index 0000000..7b8fd9b --- /dev/null +++ b/network-poc/tests/benchmark-results/2026-04-14_mistral.json @@ -0,0 +1,182 @@ +[ + { + "model": "codestral:22b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 63028, + "totalTokens": 2390, + "avgTokPerSec": 44.09843659433429, + "promptChars": 9567, + "promptTokensEst": 2392, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 4, + "testsPassed": 4, + "testsFailed": 0, + "totalDurationMs": 58359, + "totalTokens": 2313, + "avgTokPerSec": 44.04431775388366, + "promptChars": 9641, + "promptTokensEst": 2410, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 52020, + "totalTokens": 2073, + "avgTokPerSec": 44.03716103774298, + "promptChars": 10007, + "promptTokensEst": 2502, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "mistral-small3.1:24b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 76602, + "totalTokens": 2820, + "avgTokPerSec": 41.65340751865168, + "promptChars": 10816, + "promptTokensEst": 2704, + "score": 91, + "stars": "★★★★★", + "error": null + }, + { + "model": "mistral-small3.1:24b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 11004, + "promptTokensEst": 2751, + "score": 0, + "stars": "", + "error": "Puuttuvat: test_main.py" + }, + { + "model": "mistral-small3.1:24b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 10573, + "promptTokensEst": 2643, + "score": 0, + "stars": "", + "error": "Puuttuvat: test_main.py" + }, + { + "model": "devstral:24b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 54454, + "totalTokens": 1952, + "avgTokPerSec": 42.767057828688735, + "promptChars": 9829, + "promptTokensEst": 2457, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "devstral:24b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 1, + "testsFailed": 4, + "totalDurationMs": 50447, + "totalTokens": 1954, + "avgTokPerSec": 42.79877112859477, + "promptChars": 9678, + "promptTokensEst": 2420, + "score": 52, + "stars": "★★★☆☆", + "error": null + }, + { + "model": "devstral:24b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 83061, + "totalTokens": 3251, + "avgTokPerSec": 42.647732012717476, + "promptChars": 10561, + "promptTokensEst": 2640, + "score": 40, + "stars": "★★☆☆☆", + "error": null + } +] \ No newline at end of file diff --git a/network-poc/tests/benchmark-results/2026-04-14_top3.html b/network-poc/tests/benchmark-results/2026-04-14_top3.html new file mode 100644 index 0000000..94c39d8 --- /dev/null +++ b/network-poc/tests/benchmark-results/2026-04-14_top3.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/network-poc/tests/benchmark-results/2026-04-14_top3.json b/network-poc/tests/benchmark-results/2026-04-14_top3.json new file mode 100644 index 0000000..aa4db16 --- /dev/null +++ b/network-poc/tests/benchmark-results/2026-04-14_top3.json @@ -0,0 +1,182 @@ +[ + { + "model": "qwen3.5:35b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 63592, + "totalTokens": 4103, + "avgTokPerSec": 88.29857987765199, + "promptChars": 11310, + "promptTokensEst": 2828, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "qwen3.5:35b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 35262, + "totalTokens": 2733, + "avgTokPerSec": 88.26749243915684, + "promptChars": 10165, + "promptTokensEst": 2541, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "qwen3.5:35b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 1, + "testsPassed": 0, + "testsFailed": 1, + "totalDurationMs": 60346, + "totalTokens": 4728, + "avgTokPerSec": 87.67792775342463, + "promptChars": 11661, + "promptTokensEst": 2915, + "score": 40, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 4, + "testsFailed": 1, + "totalDurationMs": 80515, + "totalTokens": 3081, + "avgTokPerSec": 43.828884806830445, + "promptChars": 10150, + "promptTokensEst": 2538, + "score": 88, + "stars": "★★★★☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 5, + "testsPassed": 3, + "testsFailed": 2, + "totalDurationMs": 61598, + "totalTokens": 2441, + "avgTokPerSec": 44.017116943523455, + "promptChars": 9288, + "promptTokensEst": 2322, + "score": 76, + "stars": "★★★★☆", + "error": null + }, + { + "model": "codestral:22b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 81568, + "totalTokens": 3229, + "avgTokPerSec": 43.67638078062432, + "promptChars": 10475, + "promptTokensEst": 2619, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 30315, + "totalTokens": 2379, + "avgTokPerSec": 123.42041099401449, + "promptChars": 10111, + "promptTokensEst": 2528, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 23071, + "totalTokens": 2443, + "avgTokPerSec": 123.11212122029796, + "promptChars": 9150, + "promptTokensEst": 2288, + "score": 91, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 40933, + "totalTokens": 4370, + "avgTokPerSec": 121.8144240305409, + "promptChars": 10789, + "promptTokensEst": 2697, + "score": 100, + "stars": "★★★★★", + "error": null + } +] \ No newline at end of file