diff --git a/network-poc/native-node/src/inference.rs b/network-poc/native-node/src/inference.rs
index 3915d60..888debd 100644
--- a/network-poc/native-node/src/inference.rs
+++ b/network-poc/native-node/src/inference.rs
@@ -69,11 +69,13 @@ pub struct LlmEngine {
 
 impl LlmEngine {
     pub fn load() -> Result<Self, String> {
-        let device = Device::cuda_if_available(0).map_err(|e| format!("Device: {}", e))?;
-        let device_name = if device.is_cuda() { "CUDA" } else { "CPU" };
+        // Candle 0.8: RMS-norm ei tue CUDA:a → käytetään CPU:ta
+        // Natiivi CPU on silti ~10-20× nopeampi kuin WASM (multi-threaded, ei browser overhead)
+        let device = Device::Cpu;
+        let device_name = "CPU (native)";
         tracing::info!("LLM device: {}", device_name);
 
-        let dtype = if device.is_cuda() { DType::F16 } else { DType::F32 };
+        let dtype = DType::F32;
 
         tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct...");
         let api = Api::new().map_err(|e| format!("HF API: {}", e))?;