Benchmark: LLM generoi koodin templaattien sijaan

Vaihe 3 käyttää nyt oikeaa LLM-kutsua (CODE_SYSTEM-prompti)
koodin generointiin. Templaattifunktiot poistettu kokonaan.
Tämä mittaa mallin todellista koodingenerointikykyä.
This commit is contained in:
2026-04-13 22:23:35 +03:00
parent 20cea8f268
commit 4aa09e1025

View File

@@ -133,118 +133,59 @@ Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_
const FIX_SYSTEM = 'You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code.';
// === Template-funktiot (kopioitu korjatusta index.astrosta) ===
function pyLiteral(val) {
if (val === true) return 'True';
if (val === false) return 'False';
if (val === null || val === undefined) return 'None';
if (typeof val === 'string') return `"${val}"`;
return String(val);
const CODE_SYSTEM = `You are a Python backend developer. Generate a complete FastAPI project with SQLAlchemy and SQLite.
Given the project requirements and JSON specification, generate these 5 files:
1. models.py - SQLAlchemy models with database setup (create_engine, declarative_base, sessionmaker, Base.metadata.create_all)
2. schemas.py - Pydantic schemas (Create + Response for each entity, use ConfigDict(from_attributes=True))
3. main.py - FastAPI application with full CRUD endpoints for each entity
4. test_main.py - Pytest tests using TestClient with separate test database and dependency override
5. pyproject.toml - Project configuration with dependencies
OUTPUT FORMAT — use these exact markers to separate files:
=== models.py ===
<python code>
=== schemas.py ===
<python code>
=== main.py ===
<python code>
=== test_main.py ===
<python code>
=== pyproject.toml ===
<toml content>
RULES:
- SQLite: create_engine("sqlite:///./app.db", connect_args={"check_same_thread": False})
- Each model: auto-increment "id" Column(Integer, primary_key=True, index=True)
- Schemas: BaseModel with ConfigDict(from_attributes=True) for Response variants
- Endpoints per entity: POST (create, 201), GET (list), GET by id (404 if missing), PUT (update), DELETE (204)
- Tests: separate test.db, override get_db dependency, use TestClient
- pyproject.toml: fastapi, uvicorn[standard], sqlalchemy, pytest, httpx
- Status fields: String(20) with default, NEVER Enum
- Absolute imports only (from models import ..., from schemas import ...)
- Python booleans: True/False/None (not true/false/null/none)
- NO markdown fences inside file content — just raw code
- Every _id foreign key field MUST have ForeignKey("table.id") constraint`;
// === Tiedostoparseri LLM-vastauksesta ===
function parseGeneratedFiles(text) {
const files = {};
const sections = text.split(/===\s*(\S+\.(?:py|toml))\s*===/);
// sections: [preamble, filename1, content1, filename2, content2, ...]
for (let i = 1; i < sections.length - 1; i += 2) {
const name = sections[i];
let content = sections[i + 1].trim();
// Poista mahdolliset markdown-aidat
content = content.replace(/^```(?:python|toml)?\s*\n?/m, '').replace(/\n?```\s*$/m, '').trim();
if (content) files[name] = content + '\n';
}
function pyJsonLiteral(obj) {
const parts = Object.entries(obj).map(([k, v]) => {
let pyVal;
if (v === true) pyVal = 'True'; else if (v === false) pyVal = 'False';
else if (v === null) pyVal = 'None'; else if (typeof v === 'string') pyVal = `"${v}"`;
else pyVal = String(v);
return `"${k}":${pyVal}`;
});
return '{' + parts.join(',') + '}';
}
function tmplModels(spec) {
const saTypes = new Set(['Integer']);
for (const e of spec.entities) for (const f of e.fields) saTypes.add(f.sa_type.match(/^(\w+)/)[1]);
const relMap = {};
for (const r of (spec.relationships || [])) {
const target = spec.entities.find(e => e.name === r.to);
if (target) relMap[`${r.from}.${r.field}`] = target.table_name;
}
if (Object.keys(relMap).length > 0) saTypes.add('ForeignKey');
const imports = [...saTypes].sort().join(', ');
let code = `from sqlalchemy import create_engine, Column, ${imports}\nfrom sqlalchemy.orm import declarative_base, sessionmaker\n\nDATABASE_URL = "sqlite:///./app.db"\nengine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})\nSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)\nBase = declarative_base()\n\n`;
for (const e of spec.entities) {
code += `class ${e.name}(Base):\n __tablename__ = "${e.table_name}"\n id = Column(Integer, primary_key=True, index=True)\n`;
for (const f of e.fields) {
const fkTarget = relMap[`${e.name}.${f.name}`];
let parts = fkTarget ? [`Column(${f.sa_type}, ForeignKey("${fkTarget}.id")`] : [`Column(${f.sa_type}`];
if (!f.nullable) parts.push('nullable=False');
if (f.default !== null && f.default !== undefined) parts.push(`default=${pyLiteral(f.default)}`);
code += ` ${f.name} = ${parts.join(', ')})\n`;
}
code += '\n';
}
code += 'Base.metadata.create_all(bind=engine)\n';
return code;
}
function tmplSchemas(spec) {
const dtTypes = new Set();
for (const e of spec.entities) for (const f of e.fields) {
if (/\bdate\b/i.test(f.py_type) && !/datetime/.test(f.py_type)) dtTypes.add('date');
if (/\bdatetime\b/i.test(f.py_type)) dtTypes.add('datetime');
}
let code = 'from pydantic import BaseModel, ConfigDict\n';
if (dtTypes.size > 0) code += `from datetime import ${[...dtTypes].sort().join(', ')}\n`;
for (const imp of (spec.extra_imports || [])) {
if (/^(date|datetime)$/.test(imp.trim())) continue;
if (/^from\s/.test(imp) || /^import\s/.test(imp)) code += imp + '\n';
}
code += '\n';
for (const e of spec.entities) {
code += `class ${e.name}Create(BaseModel):\n`;
for (const f of e.fields) {
if (f.default !== null && f.default !== undefined) code += ` ${f.name}: ${f.py_type} = ${pyLiteral(f.default)}\n`;
else if (f.nullable && f.py_type.includes('None')) code += ` ${f.name}: ${f.py_type} = None\n`;
else code += ` ${f.name}: ${f.py_type}\n`;
}
code += `\nclass ${e.name}Response(${e.name}Create):\n id: int\n model_config = ConfigDict(from_attributes=True)\n\n`;
}
return code;
}
function tmplMain(spec) {
const modelNames = spec.entities.map(e => e.name).join(', ');
const createNames = spec.entities.map(e => e.name+'Create').join(', ');
const responseNames = spec.entities.map(e => e.name+'Response').join(', ');
let code = `from fastapi import FastAPI, Depends, HTTPException\nfrom sqlalchemy.orm import Session\nfrom models import Base, engine, SessionLocal, ${modelNames}\nfrom schemas import ${createNames}, ${responseNames}\n\napp = FastAPI()\n\ndef get_db():\n db = SessionLocal()\n try:\n yield db\n finally:\n db.close()\n\n`;
for (const e of spec.entities) {
const lo = e.name.toLowerCase(), tb = e.table_name;
code += `@app.post("/${tb}/", response_model=${e.name}Response, status_code=201)\ndef create_${lo}(item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = ${e.name}(**item.model_dump())\n db.add(db_item)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`;
code += `@app.get("/${tb}/", response_model=list[${e.name}Response])\ndef list_${lo}s(db: Session = Depends(get_db)):\n return db.query(${e.name}).all()\n\n`;
code += `@app.get("/${tb}/{item_id}", response_model=${e.name}Response)\ndef get_${lo}(item_id: int, db: Session = Depends(get_db)):\n item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n return item\n\n`;
code += `@app.put("/${tb}/{item_id}", response_model=${e.name}Response)\ndef update_${lo}(item_id: int, item: ${e.name}Create, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n for key, value in item.model_dump().items():\n setattr(db_item, key, value)\n db.commit()\n db.refresh(db_item)\n return db_item\n\n`;
code += `@app.delete("/${tb}/{item_id}", status_code=204)\ndef delete_${lo}(item_id: int, db: Session = Depends(get_db)):\n db_item = db.query(${e.name}).filter(${e.name}.id == item_id).first()\n if not db_item:\n raise HTTPException(status_code=404, detail="${e.name} not found")\n db.delete(db_item)\n db.commit()\n\n`;
}
return code;
}
function tmplTests(spec) {
let code = `from fastapi.testclient import TestClient\nfrom sqlalchemy import create_engine\nfrom sqlalchemy.orm import sessionmaker\nfrom main import app, get_db\nfrom models import Base\n\nTEST_DB = "sqlite:///./test.db"\ntest_engine = create_engine(TEST_DB, connect_args={"check_same_thread": False})\nTestSession = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)\nBase.metadata.create_all(bind=test_engine)\n\ndef override_get_db():\n db = TestSession()\n try:\n yield db\n finally:\n db.close()\n\napp.dependency_overrides[get_db] = override_get_db\nclient = TestClient(app)\n\n`;
for (const e of spec.entities) {
const lo = e.name.toLowerCase(), tb = e.table_name;
const testData = {};
for (const f of e.fields) {
if (f.default !== null && f.default !== undefined) { testData[f.name] = f.default; continue; }
if (f.py_type.includes('str')) testData[f.name] = `Test ${f.name}`;
else if (f.py_type.includes('int')) testData[f.name] = 1;
else if (f.py_type.includes('float')) testData[f.name] = 1.0;
else if (f.py_type.includes('bool')) testData[f.name] = true;
else if (f.py_type.includes('date')) testData[f.name] = '2024-01-15';
}
const td = pyJsonLiteral(testData);
const firstStr = e.fields.find(f => f.py_type.includes('str') && f.name !== 'status');
const updateData = {...testData};
if (firstStr) updateData[firstStr.name] = `Updated ${firstStr.name}`;
const ud = pyJsonLiteral(updateData);
code += `def test_create_${lo}():\n response = client.post('/${tb}/', json=${td})\n assert response.status_code == 201\n assert 'id' in response.json()\n\n`;
code += `def test_list_${lo}s():\n client.post('/${tb}/', json=${td})\n response = client.get('/${tb}/')\n assert response.status_code == 200\n assert len(response.json()) >= 1\n\n`;
code += `def test_get_${lo}_by_id():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 200\n assert response.json()['id'] == item_id\n\n`;
code += `def test_get_${lo}_not_found():\n response = client.get('/${tb}/99999')\n assert response.status_code == 404\n\n`;
code += `def test_update_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.put(f'/${tb}/{item_id}', json=${ud})\n assert response.status_code == 200\n\n`;
code += `def test_delete_${lo}():\n created = client.post('/${tb}/', json=${td}).json()\n item_id = created['id']\n response = client.delete(f'/${tb}/{item_id}')\n assert response.status_code == 204\n response = client.get(f'/${tb}/{item_id}')\n assert response.status_code == 404\n\n`;
}
return code;
}
function tmplPyproject(spec) {
const name = (spec.project_name || 'app').toLowerCase().replace(/\s+/g, '-');
return `[project]\nname = "${name}"\nversion = "0.1.0"\nrequires-python = ">=3.11"\ndependencies = [\n "fastapi",\n "uvicorn[standard]",\n "sqlalchemy",\n "pytest",\n "httpx",\n]\n`;
return files;
}
// === Validaattori ===
@@ -334,15 +275,16 @@ async function runPipeline(model, scenario) {
result.specEntities = spec.entities.length;
writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
// 3. Template-generointi
console.log(` [3/5] Koodigenerointi...`);
const files = {
'models.py': tmplModels(spec),
'schemas.py': tmplSchemas(spec),
'main.py': tmplMain(spec),
'test_main.py': tmplTests(spec),
'pyproject.toml': tmplPyproject(spec),
};
// 3. LLM-koodigenerointi
console.log(` [3/5] Koodigenerointi (LLM)...`);
const codePrompt = `PROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files.`;
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 4096);
timings.push(codeResp);
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
const files = parseGeneratedFiles(codeResp.text);
const required = ['models.py', 'schemas.py', 'main.py', 'test_main.py', 'pyproject.toml'];
const missing = required.filter(f => !files[f]);
if (missing.length > 0) { result.error = `Puuttuvat: ${missing.join(', ')}`; return result; }
// 4. Validointi + korjaussilmukka
let issues = validateProjectCode(files);