diff options
| author | Franck Cuny <franck@fcuny.net> | 2026-01-02 11:19:45 -0800 |
|---|---|---|
| committer | Franck Cuny <franck@fcuny.net> | 2026-01-02 11:19:45 -0800 |
| commit | 07e02215ab168f5b46b4fab4b932e86488ff701f (patch) | |
| tree | 13af76b3785097e704b4fbce565b9e5ab1e01546 /profiles/llm.nix | |
| parent | add procs to linux (diff) | |
| download | infra-07e02215ab168f5b46b4fab4b932e86488ff701f.tar.gz | |
initial setup for ollama
Diffstat (limited to '')
| -rw-r--r-- | profiles/llm.nix | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/profiles/llm.nix b/profiles/llm.nix new file mode 100644 index 0000000..d5d02f4 --- /dev/null +++ b/profiles/llm.nix @@ -0,0 +1,18 @@ +{ ... }: +{ + services.ollama = { + enable = true; + acceleration = "rocm"; + host = "0.0.0.0"; + openFirewall = true; + environmentVariables = { + # Flash Attention: Improves memory efficiency and speeds up long context + OLLAMA_FLASH_ATTENTION = "1"; + # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization) + OLLAMA_KV_CACHE_TYPE = "q8_0"; + }; + loadModels = [ + "qwen3:8b" # https://ollama.com/library/qwen3 + ]; + }; +} |
