From 9d7496157cf1d2baaab2f5b74e1923aaba490d42 Mon Sep 17 00:00:00 2001 From: jaakko Date: Mon, 6 Apr 2026 21:52:50 +0300 Subject: [PATCH] Native node CPU-moodi: Candle 0.8 RMS-norm ei tue CUDA:a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit candle-core 0.8 ei sisällä rms-norm CUDA-kerneliä → inferenssi epäonnistui. Vaihdettu CPU:ksi joka on silti ~10-20× nopeampi kuin selaimen WASM. Co-Authored-By: Claude Opus 4.6 (1M context) --- network-poc/native-node/src/inference.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index 3915d60..888debd 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -69,11 +69,13 @@ pub struct LlmEngine { impl LlmEngine { pub fn load() -> Result { - let device = Device::cuda_if_available(0).map_err(|e| format!("Device: {}", e))?; - let device_name = if device.is_cuda() { "CUDA" } else { "CPU" }; + // Candle 0.8: RMS-norm ei tue CUDA:a → käytetään CPU:ta + // Natiivi CPU on silti ~10-20× nopeampi kuin WASM (multi-threaded, ei browser overhead) + let device = Device::Cpu; + let device_name = "CPU (native)"; tracing::info!("LLM device: {}", device_name); - let dtype = if device.is_cuda() { DType::F16 } else { DType::F32 }; + let dtype = DType::F32; tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct..."); let api = Api::new().map_err(|e| format!("HF API: {}", e))?;