aboutsummaryrefslogtreecommitdiff
path: root/profiles/llm.nix
blob: d5d02f44fb0d5f27081e2266b1bf4b6d71a10b86 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{ ... }:
{
  services.ollama = {
    enable = true;
    acceleration = "rocm";
    host = "0.0.0.0";
    openFirewall = true;
    environmentVariables = {
      # Flash Attention: Improves memory efficiency and speeds up long context
      OLLAMA_FLASH_ATTENTION = "1";
      # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization)
      OLLAMA_KV_CACHE_TYPE = "q8_0";
    };
    loadModels = [
      "qwen3:8b" # https://ollama.com/library/qwen3
    ];
  };
}