Compare commits
1 Commits
38a18c555b
...
projekti1
| Author | SHA1 | Date | |
|---|---|---|---|
| 20cea8f268 |
513
network-poc/tests/model-benchmark.mjs
Normal file
513
network-poc/tests/model-benchmark.mjs
Normal file
@@ -0,0 +1,513 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* Kipinä Model Benchmark
|
||||||
|
*
|
||||||
|
* Generoi projekteja eri Ollama-malleilla ja testaa niiden toimivuus.
|
||||||
|
* Käyttö:
|
||||||
|
* node model-benchmark.mjs # kaikki mallit, oletusskenaario
|
||||||
|
* node model-benchmark.mjs --models qwen3:8b,qwen3:30b
|
||||||
|
* node model-benchmark.mjs --ollama http://host:11434
|
||||||
|
* node model-benchmark.mjs --scenarios all # kaikki skenaariot
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
import { writeFileSync, mkdirSync, rmSync, existsSync } from 'fs';
|
||||||
|
|
||||||
|
// === CLI-argumentit ===
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
function arg(name, fallback) {
|
||||||
|
const i = args.indexOf(`--${name}`);
|
||||||
|
return i >= 0 && args[i + 1] ? args[i + 1] : fallback;
|
||||||
|
}
|
||||||
|
const OLLAMA_URL = arg('ollama', process.env.OLLAMA_URL || 'http://localhost:11434');
|
||||||
|
const HUB_URL = arg('hub', ''); // Vaihtoehto: --hub https://kipina.studio
|
||||||
|
const FILTER_MODELS = arg('models', '');
|
||||||
|
const SCENARIO_FILTER = arg('scenarios', 'default');
|
||||||
|
const OUTPUT_DIR = arg('output', '/tmp/kipina-benchmark');
|
||||||
|
const MAX_FIX_ROUNDS = 2;
|
||||||
|
|
||||||
|
// === Ollama / Hub -client ===
|
||||||
|
async function ollamaChat(model, prompt, systemPrompt, maxTokens = 2048) {
|
||||||
|
const start = Date.now();
|
||||||
|
|
||||||
|
if (HUB_URL) {
|
||||||
|
// Hub-reitti: /api/v1/chat/completions
|
||||||
|
const taskId = `bench-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;
|
||||||
|
const resp = await fetch(`${HUB_URL}/api/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ model, prompt, task_id: taskId, system_prompt: systemPrompt, max_tokens: maxTokens }),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`Hub HTTP ${resp.status}: ${await resp.text()}`);
|
||||||
|
const data = await resp.json();
|
||||||
|
const elapsed = Date.now() - start;
|
||||||
|
return {
|
||||||
|
text: (data.response || '').trim(),
|
||||||
|
tokens: data.tokens_generated || 0,
|
||||||
|
durationMs: elapsed,
|
||||||
|
tokPerSec: data.tokens_per_sec || (data.tokens_generated || 0) / (elapsed / 1000),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Suora Ollama-reitti: /api/chat
|
||||||
|
const messages = [];
|
||||||
|
if (systemPrompt) messages.push({ role: 'system', content: systemPrompt });
|
||||||
|
messages.push({ role: 'user', content: prompt });
|
||||||
|
|
||||||
|
const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
stream: false,
|
||||||
|
options: { num_predict: maxTokens, temperature: 0.7, top_k: 40, repeat_penalty: 1.15 },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}: ${await resp.text()}`);
|
||||||
|
const data = await resp.json();
|
||||||
|
const elapsed = Date.now() - start;
|
||||||
|
const text = (data.message?.content || '').trim();
|
||||||
|
const evalCount = data.eval_count || 0;
|
||||||
|
const evalDurationNs = data.eval_duration || 1;
|
||||||
|
const tokPerSec = evalCount / (evalDurationNs / 1e9);
|
||||||
|
return { text, tokens: evalCount, durationMs: elapsed, tokPerSec };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ollamaListModels() {
|
||||||
|
const url = HUB_URL ? `${HUB_URL}/api/v1/ollama/tags` : `${OLLAMA_URL}/api/tags`;
|
||||||
|
const resp = await fetch(url);
|
||||||
|
if (!resp.ok) throw new Error(`Tags: HTTP ${resp.status}`);
|
||||||
|
const data = await resp.json();
|
||||||
|
return (data.models || []).map(m => m.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Promptit (kopioitu index.astrosta) ===
|
||||||
|
const CLIENT_SYSTEM = `You are a product owner who turns vague ideas into clear, actionable software requirements.
|
||||||
|
|
||||||
|
GIVEN a short project description from the user, produce a structured brief:
|
||||||
|
|
||||||
|
1. PROJECT NAME: a short, descriptive name
|
||||||
|
2. GOAL: one sentence explaining what the software does and who it's for
|
||||||
|
3. CORE FEATURES: numbered list of 3-8 concrete features (not vague wishes)
|
||||||
|
4. DATA MODEL: list the main entities and their key fields (include field types)
|
||||||
|
5. API ENDPOINTS: list the REST endpoints (method + path + purpose)
|
||||||
|
6. CONSTRAINTS: any technical constraints (e.g. "must use SQLite", "no auth needed")
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Be specific: "User can filter todos by status" not "todo management"
|
||||||
|
- Use plain English, no code
|
||||||
|
- Maximum 400 words total`;
|
||||||
|
|
||||||
|
const SPEC_SYSTEM = `You are a software architect who designs database schemas for Python web applications.
|
||||||
|
|
||||||
|
THINK STEP BY STEP before outputting JSON:
|
||||||
|
1. What are the main ENTITIES (nouns) in this project?
|
||||||
|
2. What FIELDS does each entity need? (name, type, required?)
|
||||||
|
3. Which entities REFERENCE each other? (e.g. "a Book belongs to an Author" → Book has author_id)
|
||||||
|
4. Are there Date/DateTime fields? → add extra_imports
|
||||||
|
|
||||||
|
Then output ONLY valid JSON (no explanations before or after).
|
||||||
|
|
||||||
|
SCHEMA:
|
||||||
|
{"project_name":"short-name","description":"One sentence","entities":[{"name":"EntityName","table_name":"entity_names","fields":[{"name":"field_name","sa_type":"String(255)","py_type":"str","nullable":false,"default":null}]}],"relationships":[{"from":"ChildEntity","field":"parent_id","to":"ParentEntity","type":"many-to-one"}],"extra_imports":[]}
|
||||||
|
|
||||||
|
FIELD RULES:
|
||||||
|
- sa_type: String(N), Text, Integer, Date, DateTime, Boolean, Float
|
||||||
|
- py_type: str, int, float, bool, date, datetime — append " | None" if nullable
|
||||||
|
- Status fields: use String(20) with default value, NEVER Enum
|
||||||
|
- Every entity gets "id" automatically — do NOT add id or redundant ID fields
|
||||||
|
- Use snake_case for field names
|
||||||
|
|
||||||
|
RELATIONSHIP RULES:
|
||||||
|
- If entity A "belongs to" entity B → A has b_id field (Integer, nullable=false) + relationship entry
|
||||||
|
- EVERY _id field MUST have a matching relationship entry
|
||||||
|
- Parent entities must appear BEFORE children in the entities array
|
||||||
|
- If no relationships, set "relationships": []
|
||||||
|
|
||||||
|
AVOID: redundant ID fields, generic names, more than 7 fields or 3 entities, non-English entity/field names (ALWAYS English even if description is Finnish)
|
||||||
|
|
||||||
|
EXAMPLES (adapt, don't copy):
|
||||||
|
Todo app → Todo: title(str), description(Text|None), due_date(Date|None), status(String20="pending")
|
||||||
|
Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_id→Author, published_at(DateTime|None), status(String20="draft")`;
|
||||||
|
|
||||||
|
const FIX_SYSTEM = 'You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code.';
|
||||||
|
|
||||||
|
// === Template-funktiot (kopioitu korjatusta index.astrosta) ===
|
||||||
|
function pyLiteral(val) {
|
||||||
|
if (val === true) return 'True';
|
||||||
|
if (val === false) return 'False';
|
||||||
|
if (val === null || val === undefined) return 'None';
|
||||||
|
if (typeof val === 'string') return `"${val}"`;
|
||||||
|
return String(val);
|
||||||
|
}
|
||||||
|
function pyJsonLiteral(obj) {
|
||||||
|
const parts = Object.entries(obj).map(([k, v]) => {
|
||||||
|
let pyVal;
|
||||||
|
if (v === true) pyVal = 'True'; else if (v === false) pyVal = 'False';
|
||||||
|
else if (v === null) pyVal = 'None'; else if (typeof v === 'string') pyVal = `"${v}"`;
|
||||||
|
else pyVal = String(v);
|
||||||
|
return `"${k}":${pyVal}`;
|
||||||
|
});
|
||||||
|
return '{' + parts.join(',') + '}';
|
||||||
|
}
|
||||||
|
function tmplModels(spec) {
|
||||||
|
const saTypes = new Set(['Integer']);
|
||||||
|
for (const e of spec.entities) for (const f of e.fields) saTypes.add(f.sa_type.match(/^(\w+)/)[1]);
|
||||||
|
const relMap = {};
|
||||||
|
for (const r of (spec.relationships || [])) {
|
||||||
|
const target = spec.entities.find(e => e.name === r.to);
|
||||||
|
if (target) relMap[`${r.from}.${r.field}`] = target.table_name;
|
||||||
|
}
|
||||||
|
if (Object.keys(relMap).length > 0) saTypes.add('ForeignKey');
|
||||||
|
const imports = [...saTypes].sort().join(', ');
|
||||||
|
let code = `from sqlalchemy import create_engine, Column, ${imports}\nfrom sqlalchemy.orm import declarative_base, sessionmaker\n\nDATABASE_URL = "sqlite:///./app.db"\nengine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})\nSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)\nBase = declarative_base()\n\n`;
|
||||||
|
for (const e of spec.entities) {
|
||||||
|
code += `class ${e.name}(Base):\n __tablename__ = "${e.table_name}"\n id = Column(Integer, primary_key=True, index=True)\n`;
|
||||||
|
for (const f of e.fields) {
|
||||||
|
const fkTarget = relMap[`${e.name}.${f.name}`];
|
||||||
|
let parts = fkTarget ? [`Column(${f.sa_type}, ForeignKey("${fkTarget}.id")`] : [`Column(${f.sa_type}`];
|
||||||
|
if (!f.nullable) parts.push('nullable=False');
|
||||||
|
if (f.default !== null && f.default !== undefined) parts.push(`default=${pyLiteral(f.default)}`);
|
||||||
|
code += ` ${f.name} = ${parts.join(', ')})\n`;
|
||||||
|
}
|
||||||
|
code += '\n';
|
||||||
|
}
|
||||||
|
code += 'Base.metadata.create_all(bind=engine)\n';
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
function tmplSchemas(spec) {
|
||||||
|
const dtTypes = new Set();
|
||||||
|
for (const e of spec.entities) for (const f of e.fields) {
|
||||||
|
if (/\bdate\b/i.test(f.py_type) && !/datetime/.test(f.py_type)) dtTypes.add('date');
|
||||||
|
if (/\bdatetime\b/i.test(f.py_type)) dtTypes.add('datetime');
|
||||||
|
}
|
||||||
|
let code = 'from pydantic import BaseModel, ConfigDict\n';
|
||||||
|
if (dtTypes.size > 0) code += `from datetime import ${[...dtTypes].sort().join(', ')}\n`;
|
||||||
|
for (const imp of (spec.extra_imports || [])) {
|
||||||
|
if (/^(date|datetime)$/.test(imp.trim())) continue;
|
||||||
|
if (/^from\s/.test(imp) || /^import\s/.test(imp)) code += imp + '\n';
|
||||||
|
}
|
||||||
|
code += '\n';
|
||||||
|
for (const e of spec.entities) {
|
||||||
|
code += `class ${e.name}Create(BaseModel):\n`;
|
||||||
|
for (const f of e.fields) {
|
||||||
|
if (f.default !== null && f.default !== undefined) code += ` ${f.name}: ${f.py_type} = ${pyLiteral(f.default)}\n`;
|
||||||
|
else if (f.nullable && f.py_type.includes('None')) code += ` ${f.name}: ${f.py_type} = None\n`;
|
||||||
|
else code += ` ${f.name}: ${f.py_type}\n`;
|
||||||
|
}
|
||||||
|
code += `\nclass ${e.name}Response(${e.name}Create):\n id: int\n model_config = ConfigDict(from_attributes=True)\n\n`;
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
function tmplMain(spec) {
|
||||||
|
const modelNames = spec.entities.map(e => e.name).join(', ');
|
||||||
|
const createNames = spec.entities.map(e => e.name+'Create').join(', ');
|
||||||
|
const responseNames = spec.entities.map(e => e.name+'Response').join(', ');
|
||||||
|
let code = `from fastapi import FastAPI, Depends, HTTPException\nfrom sqlalchemy.orm import Session\nfrom models import Base, engine, SessionLocal, ${modelNames}\nfrom schemas import ${createNames}, ${responseNames}\n\napp = FastAPI()\n\ndef get_db():\n db = SessionLocal()\n try:\n yield db\n finally:\n db.close()\n\n`;
|
||||||
|
for (const e of spec.entities) {
|
||||||
|
const lo = e.name.toLowerCase(), tb = e.table_name;
|
||||||
|
code += `@app.post("/${tb}/", response_model=${e.name}Response, status_code=201)\ndef create_${lo}(item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = ${e.name}(**item.model_dump())\n db.add(db_item)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`;
|
||||||
|
code += `@app.get("/${tb}/", response_model=list[${e.name}Response])\ndef list_${lo}s(db: Session = Depends(get_db)):\n return db.query(${e.name}).all()\n\n`;
|
||||||
|
code += `@app.get("/${tb}/{item_id}", response_model=${e.name}Response)\ndef get_${lo}(item_id: int, db: Session = Depends(get_db)):\n item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n return item\n\n`;
|
||||||
|
code += `@app.put("/${tb}/{item_id}", response_model=${e.name}Response)\ndef update_${lo}(item_id: int, item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n for key, value in item.model_dump().items():\n setattr(db_item, key, value)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`;
|
||||||
|
code += `@app.delete("/${tb}/{item_id}", status_code=204)\ndef delete_${lo}(item_id: int, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n db.delete(db_item)\n db.commit()\n\n`;
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
function tmplTests(spec) {
|
||||||
|
let code = `from fastapi.testclient import TestClient\nfrom sqlalchemy import create_engine\nfrom sqlalchemy.orm import sessionmaker\nfrom main import app, get_db\nfrom models import Base\n\nTEST_DB = "sqlite:///./test.db"\ntest_engine = create_engine(TEST_DB, connect_args={"check_same_thread": False})\nTestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)\nBase.metadata.create_all(bind=test_engine)\n\ndef override_get_db():\n db = TestSession()\n try:\n yield db\n finally:\n db.close()\n\napp.dependency_overrides[get_db] = override_get_db\nclient = TestClient(app)\n\n`;
|
||||||
|
for (const e of spec.entities) {
|
||||||
|
const lo = e.name.toLowerCase(), tb = e.table_name;
|
||||||
|
const testData = {};
|
||||||
|
for (const f of e.fields) {
|
||||||
|
if (f.default !== null && f.default !== undefined) { testData[f.name] = f.default; continue; }
|
||||||
|
if (f.py_type.includes('str')) testData[f.name] = `Test ${f.name}`;
|
||||||
|
else if (f.py_type.includes('int')) testData[f.name] = 1;
|
||||||
|
else if (f.py_type.includes('float')) testData[f.name] = 1.0;
|
||||||
|
else if (f.py_type.includes('bool')) testData[f.name] = true;
|
||||||
|
else if (f.py_type.includes('date')) testData[f.name] = '2024-01-15';
|
||||||
|
}
|
||||||
|
const td = pyJsonLiteral(testData);
|
||||||
|
const firstStr = e.fields.find(f => f.py_type.includes('str') && f.name !== 'status');
|
||||||
|
const updateData = {...testData};
|
||||||
|
if (firstStr) updateData[firstStr.name] = `Updated ${firstStr.name}`;
|
||||||
|
const ud = pyJsonLiteral(updateData);
|
||||||
|
code += `def test_create_${lo}():\n response = client.post('/${tb}/', json=${td})\n assert response.status_code == 201\n assert 'id' in response.json()\n\n`;
|
||||||
|
code += `def test_list_${lo}s():\n client.post('/${tb}/', json=${td})\n response = client.get('/${tb}/')\n assert response.status_code == 200\n assert len(response.json()) >= 1\n\n`;
|
||||||
|
code += `def test_get_${lo}_by_id():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 200\n assert response.json()['id'] == item_id\n\n`;
|
||||||
|
code += `def test_get_${lo}_not_found():\n response = client.get('/${tb}/99999')\n assert response.status_code == 404\n\n`;
|
||||||
|
code += `def test_update_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.put(f'/${tb}/{item_id}', json=${ud})\n assert response.status_code == 200\n\n`;
|
||||||
|
code += `def test_delete_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.delete(f'/${tb}/{item_id}')\n assert response.status_code == 204\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 404\n\n`;
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
function tmplPyproject(spec) {
|
||||||
|
const name = (spec.project_name || 'app').toLowerCase().replace(/\s+/g, '-');
|
||||||
|
return `[project]\nname = "${name}"\nversion = "0.1.0"\nrequires-python = ">=3.11"\ndependencies = [\n "fastapi",\n "uvicorn[standard]",\n "sqlalchemy",\n "pytest",\n "httpx",\n]\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Validaattori ===
|
||||||
|
function validateProjectCode(files) {
|
||||||
|
const issues = [];
|
||||||
|
for (const [fname, code] of Object.entries(files)) {
|
||||||
|
if (!fname.endsWith('.py')) continue;
|
||||||
|
const lines = code.split('\n');
|
||||||
|
for (const line of lines) {
|
||||||
|
const m = line.match(/^from\s+\.(\w*)\s+import/);
|
||||||
|
if (m) issues.push(`ISSUE: ${fname}: relatiivinen import`);
|
||||||
|
}
|
||||||
|
for (const line of lines) {
|
||||||
|
const m = line.match(/^from\s+(models|schemas|main)\s+import\s+(.+)/);
|
||||||
|
if (!m) continue;
|
||||||
|
const srcCode = files[m[1] + '.py'];
|
||||||
|
if (!srcCode) { issues.push(`ISSUE: ${fname}: ${m[1]}.py puuttuu`); continue; }
|
||||||
|
const names = m[2].split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim());
|
||||||
|
for (const name of names) {
|
||||||
|
if (name && !srcCode.includes(name)) issues.push(`ISSUE: ${fname}: "${name}" puuttuu ${m[1]}.py:stä`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fname === 'schemas.py') {
|
||||||
|
if (/:\s*date\b/.test(code) && !/from datetime import/.test(code))
|
||||||
|
issues.push('ISSUE: schemas.py: date-import puuttuu');
|
||||||
|
if (/:\s*datetime\b/.test(code) && !/from datetime import/.test(code))
|
||||||
|
issues.push('ISSUE: schemas.py: datetime-import puuttuu');
|
||||||
|
}
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
const line = lines[i];
|
||||||
|
if (/^\s*#/.test(line) || /^\s*$/.test(line)) continue;
|
||||||
|
if (/(?<!["\w])false(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "false" → "False"`);
|
||||||
|
if (/(?<!["\w])true(?![\w"])/.test(line)) issues.push(`ISSUE: ${fname}:${i+1}: "true" → "True"`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return issues;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractJson(text) {
|
||||||
|
const m = text.match(/```(?:json)?\s*\n([\s\S]*?)```/);
|
||||||
|
if (m) text = m[1].trim();
|
||||||
|
let depth = 0, start = null;
|
||||||
|
for (let i = 0; i < text.length; i++) {
|
||||||
|
if (text[i] === '{') { if (depth === 0) start = i; depth++; }
|
||||||
|
else if (text[i] === '}') { depth--; if (depth === 0 && start !== null) { try { return JSON.parse(text.slice(start, i+1)); } catch(e) { continue; } } }
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Testiskenaariot ===
|
||||||
|
const SCENARIOS = [
|
||||||
|
{ id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' },
|
||||||
|
{ id: 'users', prompt: 'REST API käyttäjähallinnalle SQLite-tietokannalla' },
|
||||||
|
{ id: 'blog', prompt: 'Blogi-API: kirjoittajat ja artikkelit, julkaisupäivämäärä ja status' },
|
||||||
|
];
|
||||||
|
|
||||||
|
// === Pipeline: yhdelle mallille ja skenaariolle ===
|
||||||
|
async function runPipeline(model, scenario) {
|
||||||
|
const result = {
|
||||||
|
model, scenario: scenario.id,
|
||||||
|
reqOk: false, specOk: false, specEntities: 0,
|
||||||
|
validationIssues: 0, fixRounds: 0,
|
||||||
|
testsTotal: 0, testsPassed: 0, testsFailed: 0,
|
||||||
|
totalDurationMs: 0, totalTokens: 0, avgTokPerSec: 0,
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
const timings = [];
|
||||||
|
const dir = `${OUTPUT_DIR}/${model.replace(/[/:]/g, '_')}__${scenario.id}`;
|
||||||
|
mkdirSync(dir, { recursive: true });
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 1. Vaatimukset
|
||||||
|
console.log(` [1/5] Vaatimukset...`);
|
||||||
|
const req = await ollamaChat(model, scenario.prompt, CLIENT_SYSTEM, 1024);
|
||||||
|
timings.push(req);
|
||||||
|
if (!req.text || req.text.length < 50) { result.error = 'Vaatimukset liian lyhyet'; return result; }
|
||||||
|
result.reqOk = true;
|
||||||
|
writeFileSync(`${dir}/_requirements.txt`, req.text);
|
||||||
|
|
||||||
|
// 2. JSON-speksi
|
||||||
|
console.log(` [2/5] JSON-speksi...`);
|
||||||
|
const specResp = await ollamaChat(model, `${req.text}\n\nOutput a JSON spec for this project.`, SPEC_SYSTEM, 2048);
|
||||||
|
timings.push(specResp);
|
||||||
|
const spec = extractJson(specResp.text);
|
||||||
|
if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
|
||||||
|
result.specOk = true;
|
||||||
|
result.specEntities = spec.entities.length;
|
||||||
|
writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
|
||||||
|
|
||||||
|
// 3. Template-generointi
|
||||||
|
console.log(` [3/5] Koodigenerointi...`);
|
||||||
|
const files = {
|
||||||
|
'models.py': tmplModels(spec),
|
||||||
|
'schemas.py': tmplSchemas(spec),
|
||||||
|
'main.py': tmplMain(spec),
|
||||||
|
'test_main.py': tmplTests(spec),
|
||||||
|
'pyproject.toml': tmplPyproject(spec),
|
||||||
|
};
|
||||||
|
|
||||||
|
// 4. Validointi + korjaussilmukka
|
||||||
|
let issues = validateProjectCode(files);
|
||||||
|
let fixRound = 0;
|
||||||
|
while (issues.length > 0 && fixRound < MAX_FIX_ROUNDS) {
|
||||||
|
fixRound++;
|
||||||
|
console.log(` [4/5] Korjauskierros ${fixRound} (${issues.length} ongelmaa)...`);
|
||||||
|
const issuesByFile = {};
|
||||||
|
for (const issue of issues) {
|
||||||
|
const m = issue.match(/^ISSUE:\s*(\S+?):/);
|
||||||
|
const fname = m ? m[1] : 'unknown';
|
||||||
|
if (!issuesByFile[fname]) issuesByFile[fname] = [];
|
||||||
|
issuesByFile[fname].push(issue);
|
||||||
|
}
|
||||||
|
for (const [fname, fIssues] of Object.entries(issuesByFile)) {
|
||||||
|
if (!files[fname]) continue;
|
||||||
|
const fixPrompt = `Fix the following issues in this Python file. Return ONLY the complete corrected file, no explanations.\n\nISSUES:\n${fIssues.join('\n')}\n\nCURRENT FILE (${fname}):\n\`\`\`python\n${files[fname]}\`\`\``;
|
||||||
|
const fixResp = await ollamaChat(model, fixPrompt, FIX_SYSTEM, 2048);
|
||||||
|
timings.push(fixResp);
|
||||||
|
if (fixResp.text) {
|
||||||
|
files[fname] = fixResp.text.replace(/^```(?:python)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim() + '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
issues = validateProjectCode(files);
|
||||||
|
}
|
||||||
|
result.validationIssues = issues.length;
|
||||||
|
result.fixRounds = fixRound;
|
||||||
|
|
||||||
|
// Kirjoita tiedostot levylle
|
||||||
|
for (const [fn, content] of Object.entries(files)) writeFileSync(`${dir}/${fn}`, content);
|
||||||
|
|
||||||
|
// 5. Pytest
|
||||||
|
console.log(` [5/5] Pytest...`);
|
||||||
|
try {
|
||||||
|
const uvPath = process.env.HOME + '/.local/bin/uv';
|
||||||
|
const uv = existsSync(uvPath) ? uvPath : 'uv';
|
||||||
|
execSync(`cd "${dir}" && ${uv} sync 2>/dev/null`, { timeout: 60000, stdio: 'pipe' });
|
||||||
|
execSync(`cd "${dir}" && rm -f app.db test.db`, { stdio: 'pipe' });
|
||||||
|
const pytestOut = execSync(`cd "${dir}" && ${uv} run pytest test_main.py -v --tb=short 2>&1`, { timeout: 60000, encoding: 'utf-8' });
|
||||||
|
writeFileSync(`${dir}/_pytest.txt`, pytestOut);
|
||||||
|
|
||||||
|
const passedMatch = pytestOut.match(/(\d+) passed/);
|
||||||
|
const failedMatch = pytestOut.match(/(\d+) failed/);
|
||||||
|
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
|
||||||
|
result.testsFailed = failedMatch ? parseInt(failedMatch[1]) : 0;
|
||||||
|
result.testsTotal = result.testsPassed + result.testsFailed;
|
||||||
|
} catch (e) {
|
||||||
|
const output = e.stdout || e.stderr || e.message || '';
|
||||||
|
writeFileSync(`${dir}/_pytest.txt`, output);
|
||||||
|
const passedMatch = output.match(/(\d+) passed/);
|
||||||
|
const failedMatch = output.match(/(\d+) failed/);
|
||||||
|
const errorMatch = output.match(/(\d+) error/);
|
||||||
|
result.testsPassed = passedMatch ? parseInt(passedMatch[1]) : 0;
|
||||||
|
result.testsFailed = (failedMatch ? parseInt(failedMatch[1]) : 0) + (errorMatch ? parseInt(errorMatch[1]) : 0);
|
||||||
|
result.testsTotal = result.testsPassed + result.testsFailed;
|
||||||
|
if (result.testsTotal === 0) result.error = 'Pytest kaatui';
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
result.error = e.message;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Yhteenveto
|
||||||
|
result.totalDurationMs = timings.reduce((s, t) => s + t.durationMs, 0);
|
||||||
|
result.totalTokens = timings.reduce((s, t) => s + t.tokens, 0);
|
||||||
|
result.avgTokPerSec = timings.length > 0 ? timings.reduce((s, t) => s + t.tokPerSec, 0) / timings.length : 0;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Main ===
|
||||||
|
async function main() {
|
||||||
|
console.log('╔══════════════════════════════════════════════╗');
|
||||||
|
console.log('║ Kipinä Model Benchmark ║');
|
||||||
|
console.log('╚══════════════════════════════════════════════╝');
|
||||||
|
console.log(`Ollama: ${OLLAMA_URL}`);
|
||||||
|
|
||||||
|
// Haetaan mallit
|
||||||
|
let models;
|
||||||
|
try {
|
||||||
|
models = await ollamaListModels();
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Ei yhteyttä Ollamaan (${OLLAMA_URL}): ${e.message}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FILTER_MODELS) {
|
||||||
|
const filter = FILTER_MODELS.split(',').map(s => s.trim());
|
||||||
|
models = models.filter(m => filter.some(f => m.includes(f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Mallit (${models.length}): ${models.join(', ')}`);
|
||||||
|
|
||||||
|
const scenarios = SCENARIO_FILTER === 'all' ? SCENARIOS : [SCENARIOS[0]];
|
||||||
|
console.log(`Skenaariot (${scenarios.length}): ${scenarios.map(s => s.id).join(', ')}`);
|
||||||
|
console.log(`Tulokset: ${OUTPUT_DIR}/`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Puhdista output
|
||||||
|
rmSync(OUTPUT_DIR, { recursive: true, force: true });
|
||||||
|
mkdirSync(OUTPUT_DIR, { recursive: true });
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
for (const model of models) {
|
||||||
|
for (const scenario of scenarios) {
|
||||||
|
console.log(`\n━━━ ${model} × ${scenario.id} ━━━`);
|
||||||
|
const r = await runPipeline(model, scenario);
|
||||||
|
results.push(r);
|
||||||
|
|
||||||
|
const status = r.error ? `✗ ${r.error}` :
|
||||||
|
r.testsPassed === r.testsTotal && r.testsTotal > 0 ? `✓ ${r.testsPassed}/${r.testsTotal}` :
|
||||||
|
`◐ ${r.testsPassed}/${r.testsTotal}`;
|
||||||
|
console.log(` → ${status} | ${(r.totalDurationMs/1000).toFixed(1)}s | ${r.totalTokens} tok | ${r.avgTokPerSec.toFixed(1)} tok/s`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Tulostaulu ===
|
||||||
|
console.log('\n\n╔══════════════════════════════════════════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ TULOKSET ║');
|
||||||
|
console.log('╠══════════════════════════════════════════════════════════════════════════════════════════════════╣');
|
||||||
|
|
||||||
|
const header = [
|
||||||
|
'Malli'.padEnd(40),
|
||||||
|
'Skenaario'.padEnd(10),
|
||||||
|
'Speksi'.padEnd(8),
|
||||||
|
'Testit'.padEnd(10),
|
||||||
|
'Korjaus'.padEnd(8),
|
||||||
|
'Aika'.padEnd(8),
|
||||||
|
'tok/s'.padEnd(8),
|
||||||
|
'Tulos',
|
||||||
|
].join(' │ ');
|
||||||
|
console.log(`║ ${header} ║`);
|
||||||
|
console.log('╠' + '═'.repeat(header.length + 2) + '╣');
|
||||||
|
|
||||||
|
for (const r of results) {
|
||||||
|
const specStatus = r.specOk ? `✓ ${r.specEntities}e` : '✗';
|
||||||
|
const testStatus = r.testsTotal > 0 ? `${r.testsPassed}/${r.testsTotal}` : '-';
|
||||||
|
const fixStatus = r.fixRounds > 0 ? `${r.fixRounds}×` : '-';
|
||||||
|
const time = `${(r.totalDurationMs/1000).toFixed(0)}s`;
|
||||||
|
const speed = `${r.avgTokPerSec.toFixed(0)}`;
|
||||||
|
const verdict = r.error ? '✗ FAIL' : r.testsPassed === r.testsTotal && r.testsTotal > 0 ? '✓ PASS' : '◐ PARTIAL';
|
||||||
|
|
||||||
|
const row = [
|
||||||
|
r.model.padEnd(40),
|
||||||
|
r.scenario.padEnd(10),
|
||||||
|
specStatus.padEnd(8),
|
||||||
|
testStatus.padEnd(10),
|
||||||
|
fixStatus.padEnd(8),
|
||||||
|
time.padEnd(8),
|
||||||
|
speed.padEnd(8),
|
||||||
|
verdict,
|
||||||
|
].join(' │ ');
|
||||||
|
console.log(`║ ${row} ║`);
|
||||||
|
}
|
||||||
|
console.log('╚' + '═'.repeat(header.length + 2) + '╝');
|
||||||
|
|
||||||
|
// Tallenna JSON
|
||||||
|
writeFileSync(`${OUTPUT_DIR}/results.json`, JSON.stringify(results, null, 2));
|
||||||
|
console.log(`\nJSON: ${OUTPUT_DIR}/results.json`);
|
||||||
|
|
||||||
|
// Yhteenveto
|
||||||
|
const passed = results.filter(r => !r.error && r.testsPassed === r.testsTotal && r.testsTotal > 0);
|
||||||
|
const partial = results.filter(r => !r.error && r.testsPassed < r.testsTotal && r.testsTotal > 0);
|
||||||
|
const failed = results.filter(r => r.error || r.testsTotal === 0);
|
||||||
|
console.log(`\n✓ PASS: ${passed.length} | ◐ PARTIAL: ${partial.length} | ✗ FAIL: ${failed.length} | Yhteensä: ${results.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => { console.error(e); process.exit(1); });
|
||||||
Reference in New Issue
Block a user