blob: d5d02f44fb0d5f27081e2266b1bf4b6d71a10b86 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
{ ... }:
{
services.ollama = {
enable = true;
acceleration = "rocm";
host = "0.0.0.0";
openFirewall = true;
environmentVariables = {
# Flash Attention: Improves memory efficiency and speeds up long context
OLLAMA_FLASH_ATTENTION = "1";
# KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization)
OLLAMA_KV_CACHE_TYPE = "q8_0";
};
loadModels = [
"qwen3:8b" # https://ollama.com/library/qwen3
];
};
}
|