Ollama-integraatio: GPU-inferenssi NVIDIA/AMD/Apple, ei Candle-rajoitteita
- docker-compose: Ollama-container GPU:lla + persistent volume malleille - native-node: Candle poistettu, kutsuu Ollaman HTTP API:a (async) - Dockerfile: yksinkertaistettu, ei CUDA SDK:ta (Ollama hoitaa GPU:n) - Tukee kaikkia malleja: qwen2.5-coder:1.5b/3b/7b/14b/32b - OLLAMA_MODEL ympäristömuuttujalla vaihdetaan malli - kpn models näyttää Ollama-mallit nopeustiedoilla Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,13 +1,9 @@
|
||||
FROM nvidia/cuda:12.6.3-devel-ubuntu24.04 AS builder
|
||||
FROM rust:slim AS builder
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl pkg-config libssl-dev g++ libvulkan-dev \
|
||||
pkg-config libssl-dev g++ libvulkan-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Rust
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
WORKDIR /app
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY hub/Cargo.toml hub/Cargo.toml
|
||||
@@ -24,15 +20,15 @@ RUN mkdir -p hub/src node/src native-node/src cli/src \
|
||||
&& cargo build --release -p native-node 2>/dev/null || true
|
||||
|
||||
COPY native-node/src native-node/src
|
||||
# RTX 4090 = sm_89, RTX 3090 = sm_86, RTX 2080 = sm_75
|
||||
ENV CUDA_COMPUTE_CAP=89
|
||||
RUN cargo build --release -p native-node
|
||||
|
||||
FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04
|
||||
FROM debian:bookworm-slim
|
||||
RUN apt-get update && apt-get install -y ca-certificates libvulkan1 && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=builder /app/target/release/native-node /usr/local/bin/native-node
|
||||
|
||||
ENV HUB_URL=ws://agentic-poc:3000/ws
|
||||
ENV OLLAMA_URL=http://ollama:11434
|
||||
ENV OLLAMA_MODEL=qwen2.5-coder:7b
|
||||
ENV ALLOCATED_GB=4
|
||||
|
||||
CMD ["native-node"]
|
||||
|
||||
Reference in New Issue
Block a user