Benchmark: kultainen esimerkki + zensical-dokumentointiohjeet
- golden-examples/todo/: 6/6 PASS referenssitoteutus - SQLAlchemy 2.0 (DeclarativeBase, Mapped, mapped_column) - Pydantic v2 (ConfigDict) - PEP 621 pyproject.toml, Python >=3.14 - Uniikki testidata per testi - CODE_SYSTEM päivitetty: few-shot kultaisesta esimerkistä - DOCUMENTATION.md: zensical-dokumentointiohjeet
This commit is contained in:
@@ -11,7 +11,11 @@
|
||||
*/
|
||||
|
||||
import { execSync } from 'child_process';
|
||||
import { writeFileSync, mkdirSync, rmSync, existsSync } from 'fs';
|
||||
import { writeFileSync, readFileSync, mkdirSync, rmSync, existsSync } from 'fs';
|
||||
import { dirname, join } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
// === CLI-argumentit ===
|
||||
const args = process.argv.slice(2);
|
||||
@@ -141,15 +145,29 @@ Blog → Author: name,email,bio(Text|None) / Post: title, content(Text), author_
|
||||
|
||||
const FIX_SYSTEM = 'You are a Python code fixer. Return ONLY the corrected Python file. No markdown fences, no explanations — just valid Python code.';
|
||||
|
||||
// === Kultainen esimerkki ===
|
||||
const GOLDEN_DIR = join(__dirname, 'golden-examples', 'todo');
|
||||
const GOLDEN_FILES = ['models.py', 'schemas.py', 'main.py', 'test_main.py', 'pyproject.toml'];
|
||||
function loadGoldenExample() {
|
||||
if (!existsSync(GOLDEN_DIR)) return '';
|
||||
let example = '\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n';
|
||||
for (const f of GOLDEN_FILES) {
|
||||
const path = join(GOLDEN_DIR, f);
|
||||
if (existsSync(path)) example += `=== ${f} ===\n${readFileSync(path, 'utf-8').trim()}\n\n`;
|
||||
}
|
||||
return example;
|
||||
}
|
||||
const GOLDEN_EXAMPLE = loadGoldenExample();
|
||||
|
||||
const CODE_SYSTEM = `You are a Python backend developer. Generate a complete FastAPI project with SQLAlchemy and SQLite.
|
||||
|
||||
Given the project requirements and JSON specification, generate these 5 files:
|
||||
Given the project requirements, JSON specification, and a REFERENCE IMPLEMENTATION, generate these 5 files:
|
||||
|
||||
1. models.py - SQLAlchemy models with database setup (create_engine, declarative_base, sessionmaker, Base.metadata.create_all)
|
||||
2. schemas.py - Pydantic schemas (Create + Response for each entity, use ConfigDict(from_attributes=True))
|
||||
3. main.py - FastAPI application with full CRUD endpoints for each entity
|
||||
4. test_main.py - Pytest tests using TestClient with separate test database and dependency override
|
||||
5. pyproject.toml - Project configuration with dependencies
|
||||
1. models.py — SQLAlchemy 2.0: DeclarativeBase, Mapped, mapped_column (NOT legacy declarative_base)
|
||||
2. schemas.py — Pydantic v2: ConfigDict(from_attributes=True) (NOT class Config)
|
||||
3. main.py — FastAPI CRUD endpoints for each entity
|
||||
4. test_main.py — Pytest with TestClient, separate test.db, unique test data per test
|
||||
5. pyproject.toml — PEP 621 [project] format (NOT [tool.poetry])
|
||||
|
||||
OUTPUT FORMAT — use these exact markers to separate files:
|
||||
|
||||
@@ -168,18 +186,17 @@ OUTPUT FORMAT — use these exact markers to separate files:
|
||||
=== pyproject.toml ===
|
||||
<toml content>
|
||||
|
||||
DOCUMENTATION — every file must have a one-line module docstring. Classes get a one-line docstring. Keep it zensical: say what it IS, not what it does. No filler.
|
||||
|
||||
RULES:
|
||||
- SQLite: create_engine("sqlite:///./app.db", connect_args={"check_same_thread": False})
|
||||
- Each model: auto-increment "id" Column(Integer, primary_key=True, index=True)
|
||||
- Schemas: BaseModel with ConfigDict(from_attributes=True) for Response variants
|
||||
- Endpoints per entity: POST (create, 201), GET (list), GET by id (404 if missing), PUT (update), DELETE (204)
|
||||
- Tests: separate test.db, override get_db dependency, use TestClient
|
||||
- pyproject.toml: fastapi, uvicorn[standard], sqlalchemy, pytest, httpx
|
||||
- Status fields: String(20) with default, NEVER Enum
|
||||
- Follow the REFERENCE IMPLEMENTATION patterns exactly
|
||||
- SQLAlchemy 2.0: DeclarativeBase + Mapped + mapped_column (not Column())
|
||||
- Python type unions: str | None (not Optional[str])
|
||||
- pyproject.toml: PEP 621 [project] format, requires-python = ">=3.14"
|
||||
- Tests: unique descriptive data per test, NOT generic "test_title" strings
|
||||
- Absolute imports only (from models import ..., from schemas import ...)
|
||||
- Python booleans: True/False/None (not true/false/null/none)
|
||||
- NO markdown fences inside file content — just raw code
|
||||
- Every _id foreign key field MUST have ForeignKey("table.id") constraint`;
|
||||
- Only test endpoints that exist in main.py — no extra tests`;
|
||||
|
||||
// === Tiedostoparseri LLM-vastauksesta ===
|
||||
function parseGeneratedFiles(text) {
|
||||
@@ -285,7 +302,7 @@ async function runPipeline(model, scenario) {
|
||||
|
||||
// 3. LLM-koodigenerointi
|
||||
console.log(` [3/5] Koodigenerointi (LLM)...`);
|
||||
const codePrompt = `PROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files.`;
|
||||
const codePrompt = `${GOLDEN_EXAMPLE}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all 5 files. Follow the reference implementation patterns exactly.`;
|
||||
const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, 8192);
|
||||
timings.push(codeResp);
|
||||
writeFileSync(`${dir}/_code_raw.txt`, codeResp.text);
|
||||
|
||||
Reference in New Issue
Block a user