{ ... }: { services.ollama = { enable = true; acceleration = "rocm"; host = "0.0.0.0"; openFirewall = true; environmentVariables = { # Flash Attention: Improves memory efficiency and speeds up long context OLLAMA_FLASH_ATTENTION = "1"; # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization) OLLAMA_KV_CACHE_TYPE = "q8_0"; }; loadModels = [ "qwen3:8b" # https://ollama.com/library/qwen3 ]; }; }