CodeBench: --compact tiivistää golden examplen templaatiksi

Python: 1340 → 335 tokenia (−75%)
Rust: 3383 → 445 tokenia (−87%)
Käyttö: node benchmark.mjs --compact --models qwen3:4b
This commit is contained in:
2026-04-14 10:59:39 +03:00
parent e7b33b7d6f
commit 01b4fb8e22
3 changed files with 88 additions and 1 deletions

View File

@@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
const RESULTS_DIR = join(__dirname, 'results');
const THINK_MODE = args.includes('--think');
const COMPACT_MODE = args.includes('--compact');
const LANG = arg('lang', 'python'); // python | rust
const MAX_FIX_ROUNDS = 2;
@@ -66,6 +67,13 @@ const LANG_CONFIG = {
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
function loadGoldenExample() {
// --compact: käytä tiivistettyä templaattia täyden koodin sijaan
if (COMPACT_MODE) {
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
const compactPath = join(__dirname, 'prompts', compactFile);
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
}
// Täysi golden example
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
if (!existsSync(todoDir)) return '';
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
@@ -376,7 +384,7 @@ async function main() {
console.log('╔══════════════════════════════════════════════╗');
console.log('║ Kipinä CodeBench ║');
console.log('╚══════════════════════════════════════════════╝');
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${COMPACT_MODE ? ' (compact)' : ''}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
// Haetaan mallit
let models;

View File

@@ -0,0 +1,36 @@
REFERENCE PATTERNS (follow exactly):
STACK: SQLAlchemy 2.0 + Pydantic v2 + FastAPI + SQLite
models.py:
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class Base(DeclarativeBase): pass
Fields: Mapped[type] = mapped_column(SqlType, default=...)
Nullable: Mapped[str | None] = mapped_column(Text, default=None)
Status: Mapped[str] = mapped_column(String(20), default="pending")
FK: Mapped[int] = mapped_column(ForeignKey("table.id"))
End: Base.metadata.create_all(bind=engine)
schemas.py:
class EntityCreate(BaseModel): fields with defaults
class EntityResponse(EntityCreate):
id: int
model_config = ConfigDict(from_attributes=True)
main.py:
def get_db(): yield SessionLocal(); finally close
POST /{table}/ → 201, model_dump()
GET /{table}/ → list
GET /{table}/{id} → 404 if not found
PUT /{table}/{id} → model_dump(), setattr loop
DELETE /{table}/{id} → 204
test_main.py:
test.db + override_get_db + TestClient
Unique Finnish data per test ("Osta maitoa", "Haettava tehtävä"...)
test_create → 201 + assert "id" in json
test_list → post first, get, assert len >= 1
test_get_by_id → post, get by id, assert id matches
test_not_found → get /99999 → 404
test_update → post, put with ALL required fields, assert 200
test_delete → post, delete 204, get again → 404

View File

@@ -0,0 +1,43 @@
REFERENCE PATTERNS (follow exactly):
STACK: Axum 0.8 + SQLx + SQLite + Tokio + Serde
Cargo.toml:
edition = "2024"
deps: axum 0.8, tokio (full), serde (derive), serde_json, sqlx (sqlite, runtime-tokio), tower-http (cors)
dev: reqwest (rustls-tls)
src/models.rs:
#[derive(Debug, Serialize, Deserialize, FromRow)]
struct Entity { id: i64, field: String, optional: Option<String> }
struct CreateEntity { field: String, optional: Option<String> }
Status fields: String with default "pending"
src/handlers.rs:
async fn create(State(pool), Json(input)) -> (StatusCode, Json<Entity>)
POST → StatusCode::CREATED, sqlx::query("INSERT...").execute + query_as last_insert_rowid
GET list → query_as("SELECT * FROM table").fetch_all
GET by id → query_as.fetch_optional, return 404 if None
PUT → query("UPDATE...SET...WHERE id=?"), rows_affected == 0 → 404
DELETE → StatusCode::NO_CONTENT, rows_affected == 0 → 404
src/lib.rs:
pub fn app(pool: SqlitePool) -> Router
pub async fn init_db(pool: &SqlitePool) → CREATE TABLE IF NOT EXISTS
Routes: .route("/{table}", post(create).get(list))
.route("/{table}/:id", get(get_one).put(update).delete(delete_one))
src/main.rs:
SqlitePool::connect("sqlite:./app.db"), init_db, bind 0.0.0.0:3000
tests/api_test.rs:
Each test: SqlitePool::connect("sqlite::memory:"), init_db, app(pool)
Spawn on random port: TcpListener::bind("127.0.0.1:0"), axum::serve
reqwest::Client for HTTP calls
Unique Finnish data ("Osta maitoa", "Haettava tehtävä"...)
test_create → 201 + assert id exists
test_list → post first, get, assert len >= 1
test_get_by_id → post, get, assert id matches
test_not_found → 404
test_update → post, put with ALL fields, assert 200
test_delete → post, delete 204, get → 404