diff --git a/kipina-codebench/benchmark.mjs b/kipina-codebench/benchmark.mjs index 27af48f..5847108 100644 --- a/kipina-codebench/benchmark.mjs +++ b/kipina-codebench/benchmark.mjs @@ -53,6 +53,7 @@ function loadPrompt(name) { const CLIENT_SYSTEM = loadPrompt('client'); const SPEC_SYSTEM = loadPrompt('spec'); const SPEC_SIMPLE_SYSTEM = existsSync(join(__dirname, 'prompts', 'spec-simple.md')) ? loadPrompt('spec-simple') : SPEC_SYSTEM; +const SPEC_PLAIN_SYSTEM = existsSync(join(__dirname, 'prompts', 'spec-plain.md')) ? loadPrompt('spec-plain') : SPEC_SIMPLE_SYSTEM; const FIX_SYSTEM = loadPrompt('fix'); // === Mallikohtaiset profiilit === @@ -295,6 +296,33 @@ function extractJson(text) { return null; } +// Plaintext spec parseri: "entity Author (authors): name string, email string" +function extractPlainSpec(text) { + const lines = text.split('\n').map(l => l.trim()).filter(Boolean); + const projectLine = lines.find(l => /^project:/i.test(l)); + const projectName = projectLine ? projectLine.replace(/^project:\s*/i, '').trim() : 'api'; + const entities = []; + const relationships = []; + for (const line of lines) { + const m = line.match(/^entity\s+(\w+)\s*\((\w+)\):\s*(.+)/i); + if (!m) continue; + const [, name, tableName, fieldsStr] = m; + const fields = []; + for (const part of fieldsStr.split(',').map(s => s.trim())) { + const fm = part.match(/^(\w+)\s+(string|text|int|float|bool|date|datetime)(?:->(\w+))?(?:=(.+))?$/i); + if (!fm) continue; + const [, fname, ftype, fkTarget, fdefault] = fm; + fields.push({ name: fname, type: ftype.toLowerCase(), nullable: false, default: fdefault || null }); + if (fkTarget) { + relationships.push({ from: name, field: fname, to: fkTarget }); + } + } + entities.push({ name, table_name: tableName, fields }); + } + if (entities.length === 0) return null; + return { project_name: projectName, entities, relationships }; +} + // === Testiskenaariot === const SCENARIOS = [ { id: 'todo', prompt: 'Todo-sovellus: tehtävien hallinta, deadline, prioriteetti ja status' }, @@ -358,17 +386,27 @@ async function runPipeline(model, scenario, round = 1) { result.reqOk = true; writeFileSync(`${dir}/_requirements.txt`, req.text); - // 2. JSON-speksi (small-malleille yksinkertaistettu skeema) + // 2. Speksi (small → plaintext, large → JSON) const specProfile = PROFILES.models[specModel]?.profile || PROFILES.default_profile; - const specPrompt = specProfile === 'small' ? SPEC_SIMPLE_SYSTEM : SPEC_SYSTEM; - console.log(` [2/5] JSON-speksi${specProfile === 'small' ? ' (simple)' : ''}...`); - const specResp = await ollamaChat(specModel, `${req.text}\n\nOutput a JSON spec for this project.`, specPrompt, 4096, specUrl); + const usePlainSpec = specProfile === 'small'; + const specPrompt = usePlainSpec ? SPEC_PLAIN_SYSTEM : SPEC_SYSTEM; + const specLabel = usePlainSpec ? 'plain' : 'JSON'; + console.log(` [2/5] Speksi (${specLabel})...`); + const specMsg = usePlainSpec + ? `${req.text}\n\nOutput the database schema for this project.` + : `${req.text}\n\nOutput a JSON spec for this project.`; + const specResp = await ollamaChat(specModel, specMsg, specPrompt, 4096, specUrl); timings.push(specResp); - const spec = extractJson(specResp.text); - if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'JSON-speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; } + const spec = usePlainSpec ? extractPlainSpec(specResp.text) : extractJson(specResp.text); + if (!spec || !spec.entities || spec.entities.length === 0) { result.error = 'Speksi epäonnistui'; writeFileSync(`${dir}/_spec_raw.txt`, specResp.text); return result; } result.specOk = true; result.specEntities = spec.entities.length; writeFileSync(`${dir}/_spec.json`, JSON.stringify(spec, null, 2)); + // Spec tekstimuodossa prompteihin + const specText = usePlainSpec + ? spec.entities.map(e => `entity ${e.name} (${e.table_name}): ${e.fields.map(f => `${f.name} ${f.type}${f.default ? '=' + f.default : ''}`).join(', ')}`).join('\n') + + (spec.relationships.length > 0 ? '\nrelationships: ' + spec.relationships.map(r => `${r.from}.${r.field} -> ${r.to}`).join(', ') : '') + : JSON.stringify(spec, null, 2); // 3. LLM-koodigenerointi // Konvertointi-moodissa: generoi Python ensin, golden+files Pythonista @@ -390,7 +428,7 @@ async function runPipeline(model, scenario, round = 1) { // Vaihe 1: generoi jokainen tiedosto for (const fileDef of fbf) { const contextBlock = context ? `\nEXISTING CODE:\n${context}\n` : ''; - const filePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`; + const filePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nSPECIFICATION:\n${specText}\n${contextBlock}\nWrite ONLY the file "${fileDef.name}": ${fileDef.desc}\nOutput raw code, no markdown fences, no explanations. Start with "package main".`; console.log(` [3/5] → ${fileDef.name}...`); const fileResp = await ollamaChat(model, filePrompt, CODE_SYSTEM, 2048); @@ -510,7 +548,7 @@ async function runPipeline(model, scenario, round = 1) { } else { // Normaali: kaikki kerralla console.log(` [3/5] Koodigenerointi (LLM)...`); - const codePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nJSON SPECIFICATION:\n${JSON.stringify(spec, null, 2)}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`; + const codePrompt = `${goldenExample}\n---\n\nPROJECT REQUIREMENTS:\n${req.text}\n\nSPECIFICATION:\n${specText}\n\nGenerate the complete project with all ${fileCount} files. Follow the reference implementation patterns exactly.`; result.promptChars = CODE_SYSTEM.length + codePrompt.length; result.promptTokensEst = Math.round(result.promptChars / 4); const codeResp = await ollamaChat(model, codePrompt, CODE_SYSTEM, codeTokens); diff --git a/kipina-codebench/prompts/spec-plain.md b/kipina-codebench/prompts/spec-plain.md new file mode 100644 index 0000000..60ebd43 --- /dev/null +++ b/kipina-codebench/prompts/spec-plain.md @@ -0,0 +1,19 @@ +You design database schemas. Output ONLY the schema in this exact format, nothing else. + +FORMAT (one entity per line): +project: project-name +entity EntityName (table_name): field1 type, field2 type, field3 type=default +entity ChildName (table_name): field1 type, parent_id int->ParentName, field2 type + +TYPES: string, text, int, float, bool, date, datetime +RULES: +- id is automatic, do NOT include it +- FK fields end with _id and use -> to reference parent +- Parent entities BEFORE children +- Max 7 fields per entity, max 3 entities +- Status fields: string with =default (e.g. status string=draft) + +EXAMPLE: +project: blog-api +entity Author (authors): name string, email string, bio text +entity Post (posts): title string, content text, author_id int->Author, published_at datetime, status string=draft