diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index 91795d8..608ccf2 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -124,7 +124,8 @@ impl LlmEngine { } pub fn generate(&mut self, prompt: &str, max_tokens: usize) -> Result { - let formatted = format!("<|im_start|>system\nYou are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", prompt); + // Prefill: aloitetaan vastaus ```-koodiblokkilla → malli jatkaa suoraan koodilla + let formatted = format!("<|im_start|>system\nYou are a coding assistant. Respond with ONLY code. No explanations, no markdown, no comments unless asked.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n```\n", prompt); let encoding = self.tokenizer.encode(formatted.as_str(), true) .map_err(|e| format!("Encode: {}", e))?; diff --git a/network-poc/node/src/qwen_coder.rs b/network-poc/node/src/qwen_coder.rs index 31bf791..470dabc 100644 --- a/network-poc/node/src/qwen_coder.rs +++ b/network-poc/node/src/qwen_coder.rs @@ -254,7 +254,9 @@ pub async fn run_coder_inference(prompt: String, ws: Rc>, use (prompt.clone(), default_system.to_string(), 128) }; - let formatted = format!("<|im_start|>system\n{}<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", system_msg, actual_prompt); + // Prefill: aloitetaan vastaus ```-koodiblokkilla, jolloin malli jatkaa suoraan koodilla + // eikä tuota "Sure! Here is..." -johdantoa. strip_markdown_wrapper poistaa ``` jälkikäteen. + let formatted = format!("<|im_start|>system\n{}<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n```\n", system_msg, actual_prompt); // Inferenssi: käytetään välimuistissa olevaa mallia let (generated_text, tokens_generated, gen_time) = MODEL_CACHE.with(|cache| {