From 4aa09e10255a9afcb92ae53fc7bdd10c00e4ca51 Mon Sep 17 00:00:00 2001 From: jaakko Date: Mon, 13 Apr 2026 22:23:35 +0300 Subject: [PATCH] Benchmark: LLM generoi koodin templaattien sijaan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vaihe 3 käyttää nyt oikeaa LLM-kutsua (CODE_SYSTEM-prompti) koodin generointiin. Templaattifunktiot poistettu kokonaan. Tämä mittaa mallin todellista koodingenerointikykyä. --- network-poc/tests/model-benchmark.mjs | 182 +++++++++----------------- 1 file changed, 62 insertions(+), 120 deletions(-) diff --git a/network-poc/tests/model-benchmark.mjs b/network-poc/tests/model-benchmark.mjs index 6180dfd..528ce85 100644 --- a/network-poc/tests/model-benchmark.mjs +++ b/network-poc/tests/model-benchmark.mjs @@ -133,118 +133,59 @@ Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_ const FIX_SYSTEM = 'You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code.'; -// === Template-funktiot (kopioitu korjatusta index.astrosta) === -function pyLiteral(val) { - if (val === true) return 'True'; - if (val === false) return 'False'; - if (val === null || val === undefined) return 'None'; - if (typeof val === 'string') return `"${val}"`; - return String(val); -} -function pyJsonLiteral(obj) { - const parts = Object.entries(obj).map(([k, v]) => { - let pyVal; - if (v === true) pyVal = 'True'; else if (v === false) pyVal = 'False'; - else if (v === null) pyVal = 'None'; else if (typeof v === 'string') pyVal = `"${v}"`; - else pyVal = String(v); - return `"${k}":${pyVal}`; - }); - return '{' + parts.join(',') + '}'; -} -function tmplModels(spec) { - const saTypes = new Set(['Integer']); - for (const e of spec.entities) for (const f of e.fields) saTypes.add(f.sa_type.match(/^(\w+)/)[1]); - const relMap = {}; - for (const r of (spec.relationships || [])) { - const target = spec.entities.find(e => e.name === r.to); - if (target) relMap[`${r.from}.${r.field}`] = target.table_name; +const CODE_SYSTEM = `You are a Python backend developer. Generate a complete FastAPI project with SQLAlchemy and SQLite. + +Given the project requirements and JSON specification, generate these 5 files: + +1. models.py - SQLAlchemy models with database setup (create_engine, declarative_base, sessionmaker, Base.metadata.create_all) +2. schemas.py - Pydantic schemas (Create + Response for each entity, use ConfigDict(from_attributes=True)) +3. main.py - FastAPI application with full CRUD endpoints for each entity +4. test_main.py - Pytest tests using TestClient with separate test database and dependency override +5. pyproject.toml - Project configuration with dependencies + +OUTPUT FORMAT — use these exact markers to separate files: + +=== models.py === + + +=== schemas.py === + + +=== main.py === + + +=== test_main.py === + + +=== pyproject.toml === + + +RULES: +- SQLite: create_engine("sqlite:///./app.db", connect_args={"check_same_thread": False}) +- Each model: auto-increment "id" Column(Integer, primary_key=True, index=True) +- Schemas: BaseModel with ConfigDict(from_attributes=True) for Response variants +- Endpoints per entity: POST (create, 201), GET (list), GET by id (404 if missing), PUT (update), DELETE (204) +- Tests: separate test.db, override get_db dependency, use TestClient +- pyproject.toml: fastapi, uvicorn[standard], sqlalchemy, pytest, httpx +- Status fields: String(20) with default, NEVER Enum +- Absolute imports only (from models import ..., from schemas import ...) +- Python booleans: True/False/None (not true/false/null/none) +- NO markdown fences inside file content — just raw code +- Every _id foreign key field MUST have ForeignKey("table.id") constraint`; + +// === Tiedostoparseri LLM-vastauksesta === +function parseGeneratedFiles(text) { + const files = {}; + const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/); + // sections: [preamble, filename1, content1, filename2, content2, ...] + for (let i = 1; i < sections.length - 1; i += 2) { + const name = sections[i]; + let content = sections[i + 1].trim(); + // Poista mahdolliset markdown-aidat + content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim(); + if (content) files[name] = content + '\n'; } - if (Object.keys(relMap).length > 0) saTypes.add('ForeignKey'); - const imports = [...saTypes].sort().join(', '); - let code = `from sqlalchemy import create_engine, Column, ${imports}\nfrom sqlalchemy.orm import declarative_base, sessionmaker\n\nDATABASE_URL = "sqlite:///./app.db"\nengine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})\nSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)\nBase = declarative_base()\n\n`; - for (const e of spec.entities) { - code += `class ${e.name}(Base):\n __tablename__ = "${e.table_name}"\n id = Column(Integer, primary_key=True, index=True)\n`; - for (const f of e.fields) { - const fkTarget = relMap[`${e.name}.${f.name}`]; - let parts = fkTarget ? [`Column(${f.sa_type}, ForeignKey("${fkTarget}.id")`] : [`Column(${f.sa_type}`]; - if (!f.nullable) parts.push('nullable=False'); - if (f.default !== null && f.default !== undefined) parts.push(`default=${pyLiteral(f.default)}`); - code += ` ${f.name} = ${parts.join(', ')})\n`; - } - code += '\n'; - } - code += 'Base.metadata.create_all(bind=engine)\n'; - return code; -} -function tmplSchemas(spec) { - const dtTypes = new Set(); - for (const e of spec.entities) for (const f of e.fields) { - if (/\bdate\b/i.test(f.py_type) && !/datetime/.test(f.py_type)) dtTypes.add('date'); - if (/\bdatetime\b/i.test(f.py_type)) dtTypes.add('datetime'); - } - let code = 'from pydantic import BaseModel, ConfigDict\n'; - if (dtTypes.size > 0) code += `from datetime import ${[...dtTypes].sort().join(', ')}\n`; - for (const imp of (spec.extra_imports || [])) { - if (/^(date|datetime)$/.test(imp.trim())) continue; - if (/^from\s/.test(imp) || /^import\s/.test(imp)) code += imp + '\n'; - } - code += '\n'; - for (const e of spec.entities) { - code += `class ${e.name}Create(BaseModel):\n`; - for (const f of e.fields) { - if (f.default !== null && f.default !== undefined) code += ` ${f.name}: ${f.py_type} = ${pyLiteral(f.default)}\n`; - else if (f.nullable && f.py_type.includes('None')) code += ` ${f.name}: ${f.py_type} = None\n`; - else code += ` ${f.name}: ${f.py_type}\n`; - } - code += `\nclass ${e.name}Response(${e.name}Create):\n id: int\n model_config = ConfigDict(from_attributes=True)\n\n`; - } - return code; -} -function tmplMain(spec) { - const modelNames = spec.entities.map(e => e.name).join(', '); - const createNames = spec.entities.map(e => e.name+'Create').join(', '); - const responseNames = spec.entities.map(e => e.name+'Response').join(', '); - let code = `from fastapi import FastAPI, Depends, HTTPException\nfrom sqlalchemy.orm import Session\nfrom models import Base, engine, SessionLocal, ${modelNames}\nfrom schemas import ${createNames}, ${responseNames}\n\napp = FastAPI()\n\ndef get_db():\n db = SessionLocal()\n try:\n yield db\n finally:\n db.close()\n\n`; - for (const e of spec.entities) { - const lo = e.name.toLowerCase(), tb = e.table_name; - code += `@app.post("/${tb}/", response_model=${e.name}Response, status_code=201)\ndef create_${lo}(item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = ${e.name}(**item.model_dump())\n db.add(db_item)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`; - code += `@app.get("/${tb}/", response_model=list[${e.name}Response])\ndef list_${lo}s(db: Session = Depends(get_db)):\n return db.query(${e.name}).all()\n\n`; - code += `@app.get("/${tb}/{item_id}", response_model=${e.name}Response)\ndef get_${lo}(item_id: int, db: Session = Depends(get_db)):\n item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n return item\n\n`; - code += `@app.put("/${tb}/{item_id}", response_model=${e.name}Response)\ndef update_${lo}(item_id: int, item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n for key, value in item.model_dump().items():\n setattr(db_item, key, value)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`; - code += `@app.delete("/${tb}/{item_id}", status_code=204)\ndef delete_${lo}(item_id: int, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n db.delete(db_item)\n db.commit()\n\n`; - } - return code; -} -function tmplTests(spec) { - let code = `from fastapi.testclient import TestClient\nfrom sqlalchemy import create_engine\nfrom sqlalchemy.orm import sessionmaker\nfrom main import app, get_db\nfrom models import Base\n\nTEST_DB = "sqlite:///./test.db"\ntest_engine = create_engine(TEST_DB, connect_args={"check_same_thread": False})\nTestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)\nBase.metadata.create_all(bind=test_engine)\n\ndef override_get_db():\n db = TestSession()\n try:\n yield db\n finally:\n db.close()\n\napp.dependency_overrides[get_db] = override_get_db\nclient = TestClient(app)\n\n`; - for (const e of spec.entities) { - const lo = e.name.toLowerCase(), tb = e.table_name; - const testData = {}; - for (const f of e.fields) { - if (f.default !== null && f.default !== undefined) { testData[f.name] = f.default; continue; } - if (f.py_type.includes('str')) testData[f.name] = `Test ${f.name}`; - else if (f.py_type.includes('int')) testData[f.name] = 1; - else if (f.py_type.includes('float')) testData[f.name] = 1.0; - else if (f.py_type.includes('bool')) testData[f.name] = true; - else if (f.py_type.includes('date')) testData[f.name] = '2024-01-15'; - } - const td = pyJsonLiteral(testData); - const firstStr = e.fields.find(f => f.py_type.includes('str') && f.name !== 'status'); - const updateData = {...testData}; - if (firstStr) updateData[firstStr.name] = `Updated ${firstStr.name}`; - const ud = pyJsonLiteral(updateData); - code += `def test_create_${lo}():\n response = client.post('/${tb}/', json=${td})\n assert response.status_code == 201\n assert 'id' in response.json()\n\n`; - code += `def test_list_${lo}s():\n client.post('/${tb}/', json=${td})\n response = client.get('/${tb}/')\n assert response.status_code == 200\n assert len(response.json()) >= 1\n\n`; - code += `def test_get_${lo}_by_id():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 200\n assert response.json()['id'] == item_id\n\n`; - code += `def test_get_${lo}_not_found():\n response = client.get('/${tb}/99999')\n assert response.status_code == 404\n\n`; - code += `def test_update_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.put(f'/${tb}/{item_id}', json=${ud})\n assert response.status_code == 200\n\n`; - code += `def test_delete_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.delete(f'/${tb}/{item_id}')\n assert response.status_code == 204\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 404\n\n`; - } - return code; -} -function tmplPyproject(spec) { - const name = (spec.project_name || 'app').toLowerCase().replace(/\s+/g, '-'); - return `[project]\nname = "${name}"\nversion = "0.1.0"\nrequires-python = ">=3.11"\ndependencies = [\n "fastapi",\n "uvicorn[standard]",\n "sqlalchemy",\n "pytest",\n "httpx",\n]\n`; + return files; } // === Validaattori === @@ -334,15 +275,16 @@ async function runPipeline(model, scenario) { result.specEntities = spec.entities.length; writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2)); - // 3. Template-generointi - console.log(` [3/5] Koodigenerointi...`); - const files = { - 'models.py': tmplModels(spec), - 'schemas.py': tmplSchemas(spec), - 'main.py': tmplMain(spec), - 'test_main.py': tmplTests(spec), - 'pyproject.toml': tmplPyproject(spec), - }; + // 3. LLM-koodigenerointi + console.log(` [3/5] Koodigenerointi (LLM)...`); + const codePrompt = `PROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files.`; + const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 4096); + timings.push(codeResp); + writeFileSync(`${dir}/_code_raw.txt`, codeResp.text); + const files = parseGeneratedFiles(codeResp.text); + const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py', 'pyproject.toml']; + const missing = required.filter(f => !files[f]); + if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; } // 4. Validointi + korjaussilmukka let issues = validateProjectCode(files);