diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs index 2b94264..2bc71e0 100644 --- a/kipina-codebench/benchmark.mjs +++ b/kipina-codebench/benchmark.mjs @@ -68,13 +68,17 @@ const LANG_CONFIG = { const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python; function loadGoldenExample() { - // --compact: käytä tiivistettyä templaattia täyden koodin sijaan + // --compact: käytä tiivistettyä templaattia if (COMPACT_MODE) { const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md'; const compactPath = join(__dirname, 'prompts', compactFile); if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n'; } - // Täysi golden example + // Markdown golden example (koodi + selitykset) + const mdName = LANG === 'rust' ? 'todo-rs.md' : 'todo.md'; + const mdPath = join(GOLDEN_DIR, mdName); + if (existsSync(mdPath)) return '\n' + readFileSync(mdPath, 'utf-8').trim() + '\n'; + // Fallback: erilliset tiedostot const todoDir = join(GOLDEN_DIR, LCONF.goldenDir); if (!existsSync(todoDir)) return ''; let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`; diff --git a/kipina-codebench/golden-examples/todo.md b/kipina-codebench/golden-examples/todo.md new file mode 100644 index 0000000..28e59bb --- /dev/null +++ b/kipina-codebench/golden-examples/todo.md @@ -0,0 +1,220 @@ +# Todo — referenssitoteutus (FastAPI + SQLAlchemy 2.0 + SQLite) + +Tämä on täydellinen esimerkki. Generoi vastaava rakenne annetulle projektille. +Käytä VAIN JSON-spekin kenttiä — älä lisää ylimääräisiä. + +## models.py + +SQLAlchemy 2.0: `DeclarativeBase` + `Mapped` + `mapped_column`. EI `Column()`, EI `declarative_base()`. + +```python +"""Tietokantamallit — SQLAlchemy 2.0, Mapped-tyypitys, SQLite.""" + +from datetime import date + +from sqlalchemy import String, Text, Date, create_engine +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker + +DATABASE_URL = "sqlite:///./app.db" +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +class Base(DeclarativeBase): + pass + + +class Todo(Base): + """Tehtävä — otsikko, kuvaus, deadline, prioriteetti ja status.""" + + __tablename__ = "todos" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + title: Mapped[str] = mapped_column(String(255)) + description: Mapped[str | None] = mapped_column(Text, default=None) + due_date: Mapped[date | None] = mapped_column(Date, default=None) + priority: Mapped[int] = mapped_column(default=1) + status: Mapped[str] = mapped_column(String(20), default="pending") + + +Base.metadata.create_all(bind=engine) +``` + +Huomaa: +- `str | None` (ei `Optional[str]`) +- `String(20)` status-kentälle (ei Enum) +- Vain spekin kentät — ei `created_at` tai muita ylimääräisiä + +## schemas.py + +Pydantic v2: `ConfigDict(from_attributes=True)`. EI `class Config: orm_mode = True`. + +```python +"""Pydantic v2 -skeemat — Create sisääntulolle, Response vastaukselle.""" + +from datetime import date + +from pydantic import BaseModel, ConfigDict + + +class TodoCreate(BaseModel): + """Uuden tehtävän luonti. Pakolliset: title.""" + + title: str + description: str | None = None + due_date: date | None = None + priority: int = 1 + status: str = "pending" + + +class TodoResponse(TodoCreate): + """Palautettava tehtävä — sisältää id:n.""" + + id: int + model_config = ConfigDict(from_attributes=True) +``` + +## main.py + +FastAPI CRUD: POST 201, GET list, GET by id 404, PUT, DELETE 204. Käytä `model_dump()` (ei `.dict()`). + +```python +"""FastAPI CRUD — yksi endpoint-setti per entiteetti.""" + +from fastapi import FastAPI, Depends, HTTPException +from sqlalchemy.orm import Session + +from models import SessionLocal, Todo +from schemas import TodoCreate, TodoResponse + +app = FastAPI() + + +def get_db(): + """Tietokantasessio per pyyntö.""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +@app.post("/todos/", response_model=TodoResponse, status_code=201) +def create_todo(item: TodoCreate, db: Session = Depends(get_db)): + db_item = Todo(**item.model_dump()) + db.add(db_item) + db.commit() + db.refresh(db_item) + return db_item + + +@app.get("/todos/", response_model=list[TodoResponse]) +def list_todos(db: Session = Depends(get_db)): + return db.query(Todo).all() + + +@app.get("/todos/{item_id}", response_model=TodoResponse) +def get_todo(item_id: int, db: Session = Depends(get_db)): + item = db.query(Todo).filter(Todo.id == item_id).first() + if not item: + raise HTTPException(status_code=404, detail="Todo not found") + return item + + +@app.put("/todos/{item_id}", response_model=TodoResponse) +def update_todo(item_id: int, item: TodoCreate, db: Session = Depends(get_db)): + db_item = db.query(Todo).filter(Todo.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Todo not found") + for key, value in item.model_dump().items(): + setattr(db_item, key, value) + db.commit() + db.refresh(db_item) + return db_item + + +@app.delete("/todos/{item_id}", status_code=204) +def delete_todo(item_id: int, db: Session = Depends(get_db)): + db_item = db.query(Todo).filter(Todo.id == item_id).first() + if not db_item: + raise HTTPException(status_code=404, detail="Todo not found") + db.delete(db_item) + db.commit() +``` + +## test_main.py + +Testit: erillinen test.db, `override_get_db`, `TestClient`. Uniikki suomenkielinen data per testi. +PUT-testi lähettää KAIKKI pakolliset kentät. + +```python +"""Pytest — TestClient, erillinen test.db, uniikki data per testi.""" + +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from main import app, get_db +from models import Base + +test_engine = create_engine( + "sqlite:///./test.db", connect_args={"check_same_thread": False} +) +TestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine) +Base.metadata.create_all(bind=test_engine) + + +def override_get_db(): + db = TestSession() + try: + yield db + finally: + db.close() + + +app.dependency_overrides[get_db] = override_get_db +client = TestClient(app) + + +def test_create_todo(): + response = client.post("/todos/", json={"title": "Osta maitoa", "priority": 2}) + assert response.status_code == 201 + assert response.json()["title"] == "Osta maitoa" + assert "id" in response.json() + + +def test_list_todos(): + client.post("/todos/", json={"title": "Listattava tehtävä"}) + response = client.get("/todos/") + assert response.status_code == 200 + assert len(response.json()) >= 1 + + +def test_get_todo_by_id(): + created = client.post("/todos/", json={"title": "Haettava tehtävä"}).json() + response = client.get(f"/todos/{created['id']}") + assert response.status_code == 200 + assert response.json()["id"] == created["id"] + + +def test_get_todo_not_found(): + response = client.get("/todos/99999") + assert response.status_code == 404 + + +def test_update_todo(): + created = client.post("/todos/", json={"title": "Vanha otsikko"}).json() + response = client.put( + f"/todos/{created['id']}", json={"title": "Uusi otsikko"} + ) + assert response.status_code == 200 + assert response.json()["title"] == "Uusi otsikko" + + +def test_delete_todo(): + created = client.post("/todos/", json={"title": "Poistettava"}).json() + response = client.delete(f"/todos/{created['id']}") + assert response.status_code == 204 + response = client.get(f"/todos/{created['id']}") + assert response.status_code == 404 +``` diff --git a/kipina-codebench/results/2026-04-14T07-55.html b/kipina-codebench/results/2026-04-14T07-55.html new file mode 100644 index 0000000..4cdef95 --- /dev/null +++ b/kipina-codebench/results/2026-04-14T07-55.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/kipina-codebench/results/2026-04-14T07-55.json b/kipina-codebench/results/2026-04-14T07-55.json new file mode 100644 index 0000000..6000844 --- /dev/null +++ b/kipina-codebench/results/2026-04-14T07-55.json @@ -0,0 +1,122 @@ +[ + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 50350, + "totalTokens": 2797, + "avgTokPerSec": 60.919860198859574, + "promptChars": 9858, + "promptTokensEst": 2465, + "score": 80, + "stars": "★★★★☆", + "error": null + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 46557, + "totalTokens": 2584, + "avgTokPerSec": 60.88834523948, + "promptChars": 9544, + "promptTokensEst": 2386, + "score": 85, + "stars": "★★★★☆", + "error": null + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 15, + "testsPassed": 2, + "testsFailed": 13, + "totalDurationMs": 90761, + "totalTokens": 4979, + "avgTokPerSec": 60.19247492391319, + "promptChars": 10521, + "promptTokensEst": 2630, + "score": 48, + "stars": "★★☆☆☆", + "error": null + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 27360, + "totalTokens": 2466, + "avgTokPerSec": 100.9922018173994, + "promptChars": 9767, + "promptTokensEst": 2442, + "score": 0, + "stars": "☆☆☆☆☆", + "error": "Testit kaatuivat" + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 20920, + "totalTokens": 1876, + "avgTokPerSec": 101.60760023892685, + "promptChars": 8782, + "promptTokensEst": 2196, + "score": 100, + "stars": "★★★★★", + "error": null + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 10, + "testsPassed": 9, + "testsFailed": 1, + "totalDurationMs": 35766, + "totalTokens": 3217, + "avgTokPerSec": 100.40066102398943, + "promptChars": 10334, + "promptTokensEst": 2584, + "score": 94, + "stars": "★★★★★", + "error": null + } +] \ No newline at end of file diff --git a/kipina-codebench/results/2026-04-14T08-05.html b/kipina-codebench/results/2026-04-14T08-05.html new file mode 100644 index 0000000..e1bc99d --- /dev/null +++ b/kipina-codebench/results/2026-04-14T08-05.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/kipina-codebench/results/2026-04-14T08-05.json b/kipina-codebench/results/2026-04-14T08-05.json new file mode 100644 index 0000000..8e4a0e0 --- /dev/null +++ b/kipina-codebench/results/2026-04-14T08-05.json @@ -0,0 +1,947 @@ +[ + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 1, + "testsFailed": 5, + "totalDurationMs": 30801, + "totalTokens": 2333, + "avgTokPerSec": 122.77922150989748, + "promptChars": 10015, + "promptTokensEst": 2504, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 25495, + "totalTokens": 2714, + "avgTokPerSec": 122.70970007652487, + "promptChars": 9891, + "promptTokensEst": 2473, + "score": 91, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 10, + "testsFailed": 1, + "totalDurationMs": 37153, + "totalTokens": 3979, + "avgTokPerSec": 121.9183958236036, + "promptChars": 11158, + "promptTokensEst": 2790, + "score": 95, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 43456, + "totalTokens": 2411, + "avgTokPerSec": 60.89226084568145, + "promptChars": 9831, + "promptTokensEst": 2458, + "score": 91, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 8, + "testsFailed": 0, + "totalDurationMs": 40376, + "totalTokens": 2237, + "avgTokPerSec": 61.028627032662456, + "promptChars": 9343, + "promptTokensEst": 2336, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 2, + "testsFailed": 10, + "totalDurationMs": 68620, + "totalTokens": 3796, + "avgTokPerSec": 60.47793268944476, + "promptChars": 10497, + "promptTokensEst": 2624, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 25235, + "totalTokens": 2269, + "avgTokPerSec": 101.24212769079884, + "promptChars": 9294, + "promptTokensEst": 2324, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 21720, + "totalTokens": 1942, + "avgTokPerSec": 101.65074583709965, + "promptChars": 9020, + "promptTokensEst": 2255, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 10, + "testsFailed": 1, + "totalDurationMs": 39006, + "totalTokens": 3509, + "avgTokPerSec": 100.43593706181406, + "promptChars": 10372, + "promptTokensEst": 2593, + "score": 95, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 21989, + "totalTokens": 2339, + "avgTokPerSec": 122.8454095677367, + "promptChars": 10052, + "promptTokensEst": 2513, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 23997, + "totalTokens": 2551, + "avgTokPerSec": 122.23722733560855, + "promptChars": 9973, + "promptTokensEst": 2493, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 8, + "testsFailed": 0, + "totalDurationMs": 30169, + "totalTokens": 3249, + "avgTokPerSec": 123.04696524796096, + "promptChars": 11097, + "promptTokensEst": 2774, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 47091, + "totalTokens": 2602, + "avgTokPerSec": 60.962687726457375, + "promptChars": 9633, + "promptTokensEst": 2408, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 41747, + "totalTokens": 2313, + "avgTokPerSec": 60.949025583617605, + "promptChars": 9373, + "promptTokensEst": 2343, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 2, + "testsFailed": 10, + "totalDurationMs": 66888, + "totalTokens": 3699, + "avgTokPerSec": 60.49540514685331, + "promptChars": 10323, + "promptTokensEst": 2581, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 7, + "testsFailed": 1, + "totalDurationMs": 27036, + "totalTokens": 2434, + "avgTokPerSec": 101.01399069228444, + "promptChars": 9513, + "promptTokensEst": 2378, + "score": 93, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 20927, + "totalTokens": 1872, + "avgTokPerSec": 101.45096098956486, + "promptChars": 8881, + "promptTokensEst": 2220, + "score": 91, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": false, + "specEntities": 0, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 0, + "promptTokensEst": 0, + "score": 0, + "stars": "", + "error": "JSON-speksi epäonnistui", + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 26919, + "totalTokens": 2889, + "avgTokPerSec": 123.63666629145064, + "promptChars": 10162, + "promptTokensEst": 2541, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 8, + "testsFailed": 0, + "totalDurationMs": 27592, + "totalTokens": 2946, + "avgTokPerSec": 122.33273400152825, + "promptChars": 9469, + "promptTokensEst": 2367, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 35734, + "totalTokens": 3827, + "avgTokPerSec": 122.65156559717951, + "promptChars": 11086, + "promptTokensEst": 2772, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 50372, + "totalTokens": 2795, + "avgTokPerSec": 60.91611850918806, + "promptChars": 9758, + "promptTokensEst": 2440, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 1, + "testsFailed": 5, + "totalDurationMs": 38716, + "totalTokens": 2144, + "avgTokPerSec": 61.0412890406478, + "promptChars": 9415, + "promptTokensEst": 2354, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 14, + "testsPassed": 7, + "testsFailed": 7, + "totalDurationMs": 74882, + "totalTokens": 4130, + "avgTokPerSec": 60.32640855026445, + "promptChars": 10506, + "promptTokensEst": 2627, + "score": 70, + "stars": "★★★★☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 3, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 35913, + "totalTokens": 3218, + "avgTokPerSec": 100.38516205100154, + "promptChars": 11338, + "promptTokensEst": 2835, + "score": 0, + "stars": "☆☆☆☆☆", + "error": "Testit kaatuivat", + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 20974, + "totalTokens": 1880, + "avgTokPerSec": 101.52450928280543, + "promptChars": 8803, + "promptTokensEst": 2201, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 9, + "testsFailed": 2, + "totalDurationMs": 36005, + "totalTokens": 3243, + "avgTokPerSec": 100.44301406462307, + "promptChars": 10414, + "promptTokensEst": 2604, + "score": 89, + "stars": "★★★★☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 1, + "testsFailed": 6, + "totalDurationMs": 23071, + "totalTokens": 2469, + "avgTokPerSec": 124.09643322620661, + "promptChars": 9960, + "promptTokensEst": 2490, + "score": 49, + "stars": "★★☆☆☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 2, + "testsFailed": 6, + "totalDurationMs": 27062, + "totalTokens": 2907, + "avgTokPerSec": 123.35530975346687, + "promptChars": 9558, + "promptTokensEst": 2390, + "score": 55, + "stars": "★★★☆☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 9, + "testsFailed": 0, + "totalDurationMs": 29395, + "totalTokens": 3156, + "avgTokPerSec": 123.22575073561812, + "promptChars": 10574, + "promptTokensEst": 2644, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 39590, + "totalTokens": 2198, + "avgTokPerSec": 61.051945510465806, + "promptChars": 9664, + "promptTokensEst": 2416, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 1, + "testsFailed": 5, + "totalDurationMs": 36950, + "totalTokens": 2042, + "avgTokPerSec": 61.01436784429489, + "promptChars": 9225, + "promptTokensEst": 2306, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 14, + "testsPassed": 2, + "testsFailed": 12, + "totalDurationMs": 80600, + "totalTokens": 4437, + "avgTokPerSec": 60.29371170543078, + "promptChars": 10688, + "promptTokensEst": 2672, + "score": 49, + "stars": "★★☆☆☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 29125, + "totalTokens": 2619, + "avgTokPerSec": 100.90587777586212, + "promptChars": 9899, + "promptTokensEst": 2475, + "score": 0, + "stars": "☆☆☆☆☆", + "error": "Testit kaatuivat", + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 8, + "testsFailed": 0, + "totalDurationMs": 21847, + "totalTokens": 1957, + "avgTokPerSec": 101.44111070734304, + "promptChars": 8946, + "promptTokensEst": 2237, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": false, + "specEntities": 0, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 0, + "promptTokensEst": 0, + "score": 0, + "stars": "", + "error": "JSON-speksi epäonnistui", + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 1, + "testsFailed": 5, + "totalDurationMs": 21127, + "totalTokens": 2245, + "avgTokPerSec": 124.22714049663371, + "promptChars": 9972, + "promptTokensEst": 2493, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 7, + "testsFailed": 2, + "totalDurationMs": 30281, + "totalTokens": 3079, + "avgTokPerSec": 123.00254714651271, + "promptChars": 9562, + "promptTokensEst": 2391, + "score": 87, + "stars": "★★★★☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 12, + "testsFailed": 0, + "totalDurationMs": 39630, + "totalTokens": 4274, + "avgTokPerSec": 123.08303937451802, + "promptChars": 11119, + "promptTokensEst": 2780, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 38032, + "totalTokens": 2104, + "avgTokPerSec": 61.05445464163662, + "promptChars": 9455, + "promptTokensEst": 2364, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 39620, + "totalTokens": 2193, + "avgTokPerSec": 61.04565233675101, + "promptChars": 9481, + "promptTokensEst": 2370, + "score": 0, + "stars": "☆☆☆☆☆", + "error": "Testit kaatuivat", + "round": 5 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 7, + "testsFailed": 2, + "totalDurationMs": 63579, + "totalTokens": 3520, + "avgTokPerSec": 60.51513453009977, + "promptChars": 10493, + "promptTokensEst": 2623, + "score": 87, + "stars": "★★★★☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 30845, + "totalTokens": 2777, + "avgTokPerSec": 100.79046137130972, + "promptChars": 9507, + "promptTokensEst": 2377, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 21413, + "totalTokens": 1914, + "avgTokPerSec": 101.25525436264132, + "promptChars": 8804, + "promptTokensEst": 2201, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": false, + "specEntities": 0, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 0, + "promptTokensEst": 0, + "score": 0, + "stars": "", + "error": "JSON-speksi epäonnistui", + "round": 5 + } +] \ No newline at end of file diff --git a/kipina-codebench/results/2026-04-14T08-18.html b/kipina-codebench/results/2026-04-14T08-18.html new file mode 100644 index 0000000..7887d39 --- /dev/null +++ b/kipina-codebench/results/2026-04-14T08-18.html @@ -0,0 +1,183 @@ + + + + + +Kipina Model Benchmark + + + + +

Kipina Model Benchmark

+
+ +
+ +

Mallikohtainen yhteenveto

+
+ +

Kaikki tulokset

+
+ + + + diff --git a/kipina-codebench/results/2026-04-14T08-18.json b/kipina-codebench/results/2026-04-14T08-18.json new file mode 100644 index 0000000..29da503 --- /dev/null +++ b/kipina-codebench/results/2026-04-14T08-18.json @@ -0,0 +1,947 @@ +[ + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 33892, + "totalTokens": 2675, + "avgTokPerSec": 88.07409036121237, + "promptChars": 9688, + "promptTokensEst": 2422, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 30647, + "totalTokens": 2549, + "avgTokPerSec": 88.4488185974085, + "promptChars": 9594, + "promptTokensEst": 2399, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 13, + "testsPassed": 6, + "testsFailed": 7, + "totalDurationMs": 44371, + "totalTokens": 3678, + "avgTokPerSec": 88.172616246191, + "promptChars": 10432, + "promptTokensEst": 2608, + "score": 68, + "stars": "★★★☆☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 18385, + "totalTokens": 2375, + "avgTokPerSec": 147.62230806597154, + "promptChars": 9478, + "promptTokensEst": 2370, + "score": 91, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 13968, + "totalTokens": 1904, + "avgTokPerSec": 148.3084817167518, + "promptChars": 8837, + "promptTokensEst": 2209, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 25642, + "totalTokens": 3476, + "avgTokPerSec": 146.49556892944076, + "promptChars": 10734, + "promptTokensEst": 2684, + "score": 0, + "stars": "☆☆☆☆☆", + "error": "Testit kaatuivat", + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 19982, + "totalTokens": 2937, + "avgTokPerSec": 191.2786317674431, + "promptChars": 10281, + "promptTokensEst": 2570, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 17114, + "totalTokens": 2903, + "avgTokPerSec": 190.51221206765385, + "promptChars": 9654, + "promptTokensEst": 2414, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 22352, + "totalTokens": 3776, + "avgTokPerSec": 190.56628728306987, + "promptChars": 11134, + "promptTokensEst": 2784, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 1 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 31217, + "totalTokens": 2463, + "avgTokPerSec": 88.6684646675098, + "promptChars": 9598, + "promptTokensEst": 2400, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 27520, + "totalTokens": 2288, + "avgTokPerSec": 88.64765360012593, + "promptChars": 9612, + "promptTokensEst": 2403, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 3, + "testsFailed": 9, + "totalDurationMs": 41874, + "totalTokens": 3474, + "avgTokPerSec": 88.22266853318554, + "promptChars": 10408, + "promptTokensEst": 2602, + "score": 55, + "stars": "★★★☆☆", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 24781, + "totalTokens": 3240, + "avgTokPerSec": 146.89167309934365, + "promptChars": 10179, + "promptTokensEst": 2545, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 19148, + "totalTokens": 2605, + "avgTokPerSec": 147.55250620481297, + "promptChars": 9634, + "promptTokensEst": 2409, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 2 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 23816, + "totalTokens": 3232, + "avgTokPerSec": 147.25857324533817, + "promptChars": 9226, + "promptTokensEst": 2307, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 16639, + "totalTokens": 2369, + "avgTokPerSec": 191.61273045157245, + "promptChars": 10048, + "promptTokensEst": 2512, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 8, + "testsFailed": 1, + "totalDurationMs": 18588, + "totalTokens": 3163, + "avgTokPerSec": 190.86975006725547, + "promptChars": 10048, + "promptTokensEst": 2512, + "score": 93, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 10, + "testsPassed": 10, + "testsFailed": 0, + "totalDurationMs": 22677, + "totalTokens": 3828, + "avgTokPerSec": 190.15611016906482, + "promptChars": 11090, + "promptTokensEst": 2773, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 2 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 26449, + "totalTokens": 2063, + "avgTokPerSec": 88.77498453063184, + "promptChars": 9608, + "promptTokensEst": 2402, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 27510, + "totalTokens": 2289, + "avgTokPerSec": 88.74699253414485, + "promptChars": 9418, + "promptTokensEst": 2355, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 3, + "testsFailed": 9, + "totalDurationMs": 45105, + "totalTokens": 3738, + "avgTokPerSec": 88.04788102995212, + "promptChars": 10564, + "promptTokensEst": 2641, + "score": 55, + "stars": "★★★☆☆", + "error": null, + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 7, + "testsFailed": 1, + "totalDurationMs": 19204, + "totalTokens": 2480, + "avgTokPerSec": 147.91758782382294, + "promptChars": 9391, + "promptTokensEst": 2348, + "score": 93, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 12990, + "totalTokens": 1769, + "avgTokPerSec": 148.2616673700717, + "promptChars": 8898, + "promptTokensEst": 2225, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 10, + "testsFailed": 2, + "totalDurationMs": 25831, + "totalTokens": 3500, + "avgTokPerSec": 146.86924785880186, + "promptChars": 9465, + "promptTokensEst": 2366, + "score": 90, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 19453, + "totalTokens": 2845, + "avgTokPerSec": 191.37382231956113, + "promptChars": 10157, + "promptTokensEst": 2539, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 9, + "testsFailed": 0, + "totalDurationMs": 21570, + "totalTokens": 3529, + "avgTokPerSec": 190.65454623497536, + "promptChars": 9732, + "promptTokensEst": 2433, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 25537, + "totalTokens": 4300, + "avgTokPerSec": 189.94521619124598, + "promptChars": 11127, + "promptTokensEst": 2782, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 3 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 7, + "testsFailed": 2, + "totalDurationMs": 31923, + "totalTokens": 2522, + "avgTokPerSec": 88.62182881661799, + "promptChars": 9700, + "promptTokensEst": 2425, + "score": 87, + "stars": "★★★★☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 26000, + "totalTokens": 2163, + "avgTokPerSec": 88.86878707672254, + "promptChars": 9288, + "promptTokensEst": 2322, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 10, + "testsPassed": 10, + "testsFailed": 0, + "totalDurationMs": 43275, + "totalTokens": 3588, + "avgTokPerSec": 88.24995936347965, + "promptChars": 10173, + "promptTokensEst": 2543, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 14, + "testsPassed": 0, + "testsFailed": 14, + "totalDurationMs": 30045, + "totalTokens": 3913, + "avgTokPerSec": 146.51683619371713, + "promptChars": 10334, + "promptTokensEst": 2584, + "score": 40, + "stars": "★★☆☆☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 5, + "testsFailed": 4, + "totalDurationMs": 17076, + "totalTokens": 2321, + "avgTokPerSec": 147.99547121069506, + "promptChars": 9451, + "promptTokensEst": 2363, + "score": 73, + "stars": "★★★★☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 11, + "testsPassed": 11, + "testsFailed": 0, + "totalDurationMs": 23890, + "totalTokens": 3243, + "avgTokPerSec": 147.20125507974117, + "promptChars": 9217, + "promptTokensEst": 2304, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 6, + "testsFailed": 2, + "totalDurationMs": 21812, + "totalTokens": 3246, + "avgTokPerSec": 191.07801335688654, + "promptChars": 10249, + "promptTokensEst": 2562, + "score": 85, + "stars": "★★★★☆", + "error": null, + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 8, + "testsFailed": 1, + "totalDurationMs": 20325, + "totalTokens": 3441, + "avgTokPerSec": 190.10241840094508, + "promptChars": 9930, + "promptTokensEst": 2483, + "score": 93, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 12, + "testsPassed": 12, + "testsFailed": 0, + "totalDurationMs": 26087, + "totalTokens": 4387, + "avgTokPerSec": 189.8005689388054, + "promptChars": 11109, + "promptTokensEst": 2777, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 4 + }, + { + "model": "qwen3:14b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 7, + "testsFailed": 0, + "totalDurationMs": 30287, + "totalTokens": 2388, + "avgTokPerSec": 88.72243320918638, + "promptChars": 9695, + "promptTokensEst": 2424, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3:14b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 9, + "testsPassed": 6, + "testsFailed": 3, + "totalDurationMs": 31212, + "totalTokens": 2601, + "avgTokPerSec": 88.71289036919063, + "promptChars": 9619, + "promptTokensEst": 2405, + "score": 80, + "stars": "★★★★☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3:14b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 15, + "testsPassed": 3, + "testsFailed": 12, + "totalDurationMs": 50939, + "totalTokens": 4217, + "avgTokPerSec": 88.06125722020734, + "promptChars": 10743, + "promptTokensEst": 2686, + "score": 52, + "stars": "★★★☆☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 7, + "testsPassed": 6, + "testsFailed": 1, + "totalDurationMs": 17913, + "totalTokens": 2310, + "avgTokPerSec": 148.0291268001691, + "promptChars": 9357, + "promptTokensEst": 2339, + "score": 91, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 8, + "testsPassed": 8, + "testsFailed": 0, + "totalDurationMs": 13948, + "totalTokens": 1898, + "avgTokPerSec": 148.37907379944423, + "promptChars": 8725, + "promptTokensEst": 2181, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3:8b", + "scenario": "blog", + "reqOk": true, + "specOk": false, + "specEntities": 0, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 0, + "testsPassed": 0, + "testsFailed": 0, + "totalDurationMs": 0, + "totalTokens": 0, + "avgTokPerSec": 0, + "promptChars": 0, + "promptTokensEst": 0, + "score": 0, + "stars": "", + "error": "JSON-speksi epäonnistui", + "round": 5 + }, + { + "model": "qwen3-coder:30b", + "scenario": "todo", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 1, + "testsFailed": 5, + "totalDurationMs": 15229, + "totalTokens": 2119, + "avgTokPerSec": 192.33007410215646, + "promptChars": 9827, + "promptTokensEst": 2457, + "score": 50, + "stars": "★★★☆☆", + "error": null, + "round": 5 + }, + { + "model": "qwen3-coder:30b", + "scenario": "users", + "reqOk": true, + "specOk": true, + "specEntities": 1, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 6, + "testsPassed": 6, + "testsFailed": 0, + "totalDurationMs": 18223, + "totalTokens": 3093, + "avgTokPerSec": 190.71372054282037, + "promptChars": 9641, + "promptTokensEst": 2410, + "score": 100, + "stars": "★★★★★", + "error": null, + "round": 5 + }, + { + "model": "qwen3-coder:30b", + "scenario": "blog", + "reqOk": true, + "specOk": true, + "specEntities": 2, + "validationIssues": 0, + "fixRounds": 0, + "testsTotal": 10, + "testsPassed": 1, + "testsFailed": 9, + "totalDurationMs": 21215, + "totalTokens": 3589, + "avgTokPerSec": 190.49493540345176, + "promptChars": 11180, + "promptTokensEst": 2795, + "score": 46, + "stars": "★★☆☆☆", + "error": null, + "round": 5 + } +] \ No newline at end of file