From 07e02215ab168f5b46b4fab4b932e86488ff701f Mon Sep 17 00:00:00 2001 From: Franck Cuny Date: Fri, 2 Jan 2026 11:19:45 -0800 Subject: initial setup for ollama --- profiles/llm.nix | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 profiles/llm.nix (limited to 'profiles') diff --git a/profiles/llm.nix b/profiles/llm.nix new file mode 100644 index 0000000..d5d02f4 --- /dev/null +++ b/profiles/llm.nix @@ -0,0 +1,18 @@ +{ ... }: +{ + services.ollama = { + enable = true; + acceleration = "rocm"; + host = "0.0.0.0"; + openFirewall = true; + environmentVariables = { + # Flash Attention: Improves memory efficiency and speeds up long context + OLLAMA_FLASH_ATTENTION = "1"; + # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization) + OLLAMA_KV_CACHE_TYPE = "q8_0"; + }; + loadModels = [ + "qwen3:8b" # https://ollama.com/library/qwen3 + ]; + }; +} -- cgit v1.2.3