Native node CPU-moodi: Candle 0.8 RMS-norm ei tue CUDA:a

candle-core 0.8 ei sisällä rms-norm CUDA-kerneliä → inferenssi epäonnistui. Vaihdettu CPU:ksi joka on silti ~10-20× nopeampi kuin selaimen WASM. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 21:52:50 +03:00
parent d332b7e910
commit 9d7496157c
1 changed files with 5 additions and 3 deletions
--- a/network-poc/native-node/src/inference.rs
+++ b/network-poc/native-node/src/inference.rs
@@ -69,11 +69,13 @@ pub struct LlmEngine {
 impl LlmEngine {
    pub fn load() -> Result<Self, String> {
-        let device = Device::cuda_if_available(0).map_err(|e| format!("Device: {}", e))?;
+        // Candle 0.8: RMS-norm ei tue CUDA:a → käytetään CPU:ta
-        let device_name = if device.is_cuda() { "CUDA" } else { "CPU" };
+        // Natiivi CPU on silti ~10-20× nopeampi kuin WASM (multi-threaded, ei browser overhead)
        let device = Device::Cpu;
        let device_name = "CPU (native)";
        tracing::info!("LLM device: {}", device_name);
-        let dtype = if device.is_cuda() { DType::F16 } else { DType::F32 };
+        let dtype = DType::F32;
        tracing::info!("Ladataan Qwen2.5-Coder-0.5B-Instruct...");
        let api = Api::new().map_err(|e| format!("HF API: {}", e))?;