CodeBench: --compact tiivistää golden examplen templaatiksi
Python: 1340 → 335 tokenia (−75%) Rust: 3383 → 445 tokenia (−87%) Käyttö: node benchmark.mjs --compact --models qwen3:4b
This commit is contained in:
@@ -33,6 +33,7 @@ const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 16);
|
||||
const OUTPUT_DIR = arg('output', `/tmp/kipina-benchmark/${TIMESTAMP}`);
|
||||
const RESULTS_DIR = join(__dirname, 'results');
|
||||
const THINK_MODE = args.includes('--think');
|
||||
const COMPACT_MODE = args.includes('--compact');
|
||||
const LANG = arg('lang', 'python'); // python | rust
|
||||
const MAX_FIX_ROUNDS = 2;
|
||||
|
||||
@@ -66,6 +67,13 @@ const LANG_CONFIG = {
|
||||
const LCONF = LANG_CONFIG[LANG] || LANG_CONFIG.python;
|
||||
|
||||
function loadGoldenExample() {
|
||||
// --compact: käytä tiivistettyä templaattia täyden koodin sijaan
|
||||
if (COMPACT_MODE) {
|
||||
const compactFile = LANG === 'rust' ? 'golden-compact-rs.md' : 'golden-compact-py.md';
|
||||
const compactPath = join(__dirname, 'prompts', compactFile);
|
||||
if (existsSync(compactPath)) return '\n' + readFileSync(compactPath, 'utf-8').trim() + '\n';
|
||||
}
|
||||
// Täysi golden example
|
||||
const todoDir = join(GOLDEN_DIR, LCONF.goldenDir);
|
||||
if (!existsSync(todoDir)) return '';
|
||||
let example = `\nREFERENCE IMPLEMENTATION (todo project — follow this exact structure, style, and conventions):\n\n`;
|
||||
@@ -376,7 +384,7 @@ async function main() {
|
||||
console.log('╔══════════════════════════════════════════════╗');
|
||||
console.log('║ Kipinä CodeBench ║');
|
||||
console.log('╚══════════════════════════════════════════════╝');
|
||||
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
|
||||
console.log(`Ollama: ${OLLAMA_URL} 📝 ${LANG}${COMPACT_MODE ? ' (compact)' : ''}${THINK_MODE ? ' 🧠 thinking ON' : ''}`);
|
||||
|
||||
// Haetaan mallit
|
||||
let models;
|
||||
|
||||
36
kipina-codebench/prompts/golden-compact-py.md
Normal file
36
kipina-codebench/prompts/golden-compact-py.md
Normal file
@@ -0,0 +1,36 @@
|
||||
REFERENCE PATTERNS (follow exactly):
|
||||
|
||||
STACK: SQLAlchemy 2.0 + Pydantic v2 + FastAPI + SQLite
|
||||
|
||||
models.py:
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
class Base(DeclarativeBase): pass
|
||||
Fields: Mapped[type] = mapped_column(SqlType, default=...)
|
||||
Nullable: Mapped[str | None] = mapped_column(Text, default=None)
|
||||
Status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
FK: Mapped[int] = mapped_column(ForeignKey("table.id"))
|
||||
End: Base.metadata.create_all(bind=engine)
|
||||
|
||||
schemas.py:
|
||||
class EntityCreate(BaseModel): fields with defaults
|
||||
class EntityResponse(EntityCreate):
|
||||
id: int
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
main.py:
|
||||
def get_db(): yield SessionLocal(); finally close
|
||||
POST /{table}/ → 201, model_dump()
|
||||
GET /{table}/ → list
|
||||
GET /{table}/{id} → 404 if not found
|
||||
PUT /{table}/{id} → model_dump(), setattr loop
|
||||
DELETE /{table}/{id} → 204
|
||||
|
||||
test_main.py:
|
||||
test.db + override_get_db + TestClient
|
||||
Unique Finnish data per test ("Osta maitoa", "Haettava tehtävä"...)
|
||||
test_create → 201 + assert "id" in json
|
||||
test_list → post first, get, assert len >= 1
|
||||
test_get_by_id → post, get by id, assert id matches
|
||||
test_not_found → get /99999 → 404
|
||||
test_update → post, put with ALL required fields, assert 200
|
||||
test_delete → post, delete 204, get again → 404
|
||||
43
kipina-codebench/prompts/golden-compact-rs.md
Normal file
43
kipina-codebench/prompts/golden-compact-rs.md
Normal file
@@ -0,0 +1,43 @@
|
||||
REFERENCE PATTERNS (follow exactly):
|
||||
|
||||
STACK: Axum 0.8 + SQLx + SQLite + Tokio + Serde
|
||||
|
||||
Cargo.toml:
|
||||
edition = "2024"
|
||||
deps: axum 0.8, tokio (full), serde (derive), serde_json, sqlx (sqlite, runtime-tokio), tower-http (cors)
|
||||
dev: reqwest (rustls-tls)
|
||||
|
||||
src/models.rs:
|
||||
#[derive(Debug, Serialize, Deserialize, FromRow)]
|
||||
struct Entity { id: i64, field: String, optional: Option<String> }
|
||||
struct CreateEntity { field: String, optional: Option<String> }
|
||||
Status fields: String with default "pending"
|
||||
|
||||
src/handlers.rs:
|
||||
async fn create(State(pool), Json(input)) -> (StatusCode, Json<Entity>)
|
||||
POST → StatusCode::CREATED, sqlx::query("INSERT...").execute + query_as last_insert_rowid
|
||||
GET list → query_as("SELECT * FROM table").fetch_all
|
||||
GET by id → query_as.fetch_optional, return 404 if None
|
||||
PUT → query("UPDATE...SET...WHERE id=?"), rows_affected == 0 → 404
|
||||
DELETE → StatusCode::NO_CONTENT, rows_affected == 0 → 404
|
||||
|
||||
src/lib.rs:
|
||||
pub fn app(pool: SqlitePool) -> Router
|
||||
pub async fn init_db(pool: &SqlitePool) → CREATE TABLE IF NOT EXISTS
|
||||
Routes: .route("/{table}", post(create).get(list))
|
||||
.route("/{table}/:id", get(get_one).put(update).delete(delete_one))
|
||||
|
||||
src/main.rs:
|
||||
SqlitePool::connect("sqlite:./app.db"), init_db, bind 0.0.0.0:3000
|
||||
|
||||
tests/api_test.rs:
|
||||
Each test: SqlitePool::connect("sqlite::memory:"), init_db, app(pool)
|
||||
Spawn on random port: TcpListener::bind("127.0.0.1:0"), axum::serve
|
||||
reqwest::Client for HTTP calls
|
||||
Unique Finnish data ("Osta maitoa", "Haettava tehtävä"...)
|
||||
test_create → 201 + assert id exists
|
||||
test_list → post first, get, assert len >= 1
|
||||
test_get_by_id → post, get, assert id matches
|
||||
test_not_found → 404
|
||||
test_update → post, put with ALL fields, assert 200
|
||||
test_delete → post, delete 204, get → 404
|
||||
Reference in New Issue
Block a user