CodeBench: plaintext-speksi pienille malleille

- spec-plain.md: "entity Author (authors): name string, email string" - extractPlainSpec() parseri plaintext → {entities, relationships} - Small-profiili käyttää plain-formaattia, large JSON - specText muuttuja: plaintext tai JSON prompteihin - Ei voi mennä syntaktisesti rikki kuten JSON
2026-04-15 00:37:34 +03:00
parent 6ccf6fb0e1
commit a32c4787f8
2 changed files with 65 additions and 8 deletions
--- a/kipina-codebench/benchmark.mjs
+++ b/kipina-codebench/benchmark.mjs
@@ -53,6 +53,7 @@ function loadPrompt(name) {
 const CLIENT_SYSTEM = loadPrompt('client');
 const SPEC_SYSTEM = loadPrompt('spec');
 const SPEC_SIMPLE_SYSTEM = existsSync(join(__dirname, 'prompts', 'spec-simple.md')) ? loadPrompt('spec-simple') : SPEC_SYSTEM;
+const SPEC_PLAIN_SYSTEM = existsSync(join(__dirname, 'prompts', 'spec-plain.md')) ? loadPrompt('spec-plain') : SPEC_SIMPLE_SYSTEM;
 const FIX_SYSTEM = loadPrompt('fix');

 // === Mallikohtaiset profiilit ===
@@ -295,6 +296,33 @@ function extractJson(text) {
    return null;
 }

+// Plaintext spec parseri: "entity Author (authors): name string, email string"
+function extractPlainSpec(text) {
+    const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
+    const projectLine = lines.find(l => /^project:/i.test(l));
+    const projectName = projectLine ? projectLine.replace(/^project:\s*/i, '').trim() : 'api';
+    const entities = [];
+    const relationships = [];
+    for (const line of lines) {
+        const m = line.match(/^entity\s+(\w+)\s*\((\w+)\):\s*(.+)/i);
+        if (!m) continue;
+        const [, name, tableName, fieldsStr] = m;
+        const fields = [];
+        for (const part of fieldsStr.split(',').map(s => s.trim())) {
+            const fm = part.match(/^(\w+)\s+(string|text|int|float|bool|date|datetime)(?:->(\w+))?(?:=(.+))?$/i);
+            if (!fm) continue;
+            const [, fname, ftype, fkTarget, fdefault] = fm;
+            fields.push({ name: fname, type: ftype.toLowerCase(), nullable: false, default: fdefault || null });
+            if (fkTarget) {
+                relationships.push({ from: name, field: fname, to: fkTarget });
+            }
+        }
+        entities.push({ name, table_name: tableName, fields });
+    }
+    if (entities.length === 0) return null;
+    return { project_name: projectName, entities, relationships };
+}
+
 // === Testiskenaariot ===
 const SCENARIOS = [
    { id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' },
@@ -358,17 +386,27 @@ async function runPipeline(model, scenario, round = 1) {
        result.reqOk = true;
        writeFileSync(`${dir}/_requirements.txt`, req.text);

-        // 2. JSON-speksi (small-malleille yksinkertaistettu skeema)
+        // 2. Speksi (small → plaintext, large → JSON)
        const specProfile = PROFILES.models[specModel]?.profile || PROFILES.default_profile;
-        const specPrompt = specProfile === 'small' ? SPEC_SIMPLE_SYSTEM : SPEC_SYSTEM;
-        console.log(`    [2/5] JSON-speksi${specProfile === 'small' ? ' (simple)' : ''}...`);
-        const specResp = await ollamaChat(specModel, `${req.text}\n\nOutput a JSON spec for this project.`, specPrompt, 4096, specUrl);
+        const usePlainSpec = specProfile === 'small';
+        const specPrompt = usePlainSpec ? SPEC_PLAIN_SYSTEM : SPEC_SYSTEM;
+        const specLabel = usePlainSpec ? 'plain' : 'JSON';
+        console.log(`    [2/5] Speksi (${specLabel})...`);
+        const specMsg = usePlainSpec
+            ? `${req.text}\n\nOutput the database schema for this project.`
+            : `${req.text}\n\nOutput a JSON spec for this project.`;
+        const specResp = await ollamaChat(specModel, specMsg, specPrompt, 4096, specUrl);
        timings.push(specResp);
-        const spec = extractJson(specResp.text);
-        if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
+        const spec = usePlainSpec ? extractPlainSpec(specResp.text) : extractJson(specResp.text);
+        if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'Speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; }
        result.specOk = true;
        result.specEntities = spec.entities.length;
        writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2));
+        // Spec tekstimuodossa prompteihin
+        const specText = usePlainSpec
+            ? spec.entities.map(e => `entity ${e.name} (${e.table_name}): ${e.fields.map(f => `${f.name} ${f.type}${f.default ? '=' + f.default : ''}`).join(', ')}`).join('\n')
+                + (spec.relationships.length > 0 ? '\nrelationships: ' + spec.relationships.map(r => `${r.from}.${r.field} -> ${r.to}`).join(', ') : '')
+            : JSON.stringify(spec, null, 2);

        // 3. LLM-koodigenerointi
        // Konvertointi-moodissa: generoi Python ensin, golden+files Pythonista
@@ -390,7 +428,7 @@ async function runPipeline(model, scenario, round = 1) {
            // Vaihe 1: generoi jokainen tiedosto
            for (const fileDef of fbf) {
                const contextBlock = context ? `\nEXISTING CODE:\n${context}\n` : '';
-                const filePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`;
+                const filePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nSPECIFICATION:\n${specText}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`;

                console.log(`    [3/5]   → ${fileDef.name}...`);
                const fileResp = await ollamaChat(model, filePrompt, CODE_SYSTEM, 2048);
@@ -510,7 +548,7 @@ async function runPipeline(model, scenario, round = 1) {
        } else {
            // Normaali: kaikki kerralla
            console.log(`    [3/5] Koodigenerointi (LLM)...`);
-            const codePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
+            const codePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nSPECIFICATION:\n${specText}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`;
            result.promptChars = CODE_SYSTEM.length + codePrompt.length;
            result.promptTokensEst = Math.round(result.promptChars / 4);
            const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, codeTokens);
--- a/kipina-codebench/prompts/spec-plain.md
+++ b/kipina-codebench/prompts/spec-plain.md
@@ -0,0 +1,19 @@
+You design database schemas. Output ONLY the schema in this exact format, nothing else.
+
+FORMAT (one entity per line):
+project: project-name
+entity EntityName (table_name): field1 type, field2 type, field3 type=default
+entity ChildName (table_name): field1 type, parent_id int->ParentName, field2 type
+
+TYPES: string, text, int, float, bool, date, datetime
+RULES:
+- id is automatic, do NOT include it
+- FK fields end with _id and use -> to reference parent
+- Parent entities BEFORE children
+- Max 7 fields per entity, max 3 entities
+- Status fields: string with =default (e.g. status string=draft)
+
+EXAMPLE:
+project: blog-api
+entity Author (authors): name string, email string, bio text
+entity Post (posts): title string, content text, author_id int->Author, published_at datetime, status string=draft