diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs index 3915d60..888debd 100644 --- a/network-poc/native-node/src/inference.rs +++ b/network-poc/native-node/src/inference.rs @@ -69,11 +69,13 @@ pub struct LlmEngine { impl LlmEngine { pub fn load() -> Result { - let device = Device::cuda_if_available(0).map_err(|e| format!("Device: {}", e))?; - let device_name = if device.is_cuda() { "CUDA" } else { "CPU" }; + // Candle 0.8: RMS-norm ei tue CUDA:a → käytetään CPU:ta + // Natiivi CPU on silti ~10-20× nopeampi kuin WASM (multi-threaded, ei browser overhead) + let device = Device::Cpu; + let device_name = "CPU (native)"; tracing::info!("LLM device: {}", device_name); - let dtype = if device.is_cuda() { DType::F16 } else { DType::F32 }; + let dtype = DType::F32; tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct..."); let api = Api::new().map_err(|e| format!("HF API: {}", e))?;