aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFranck Cuny <franck@fcuny.net>2026-01-02 11:19:45 -0800
committerFranck Cuny <franck@fcuny.net>2026-01-02 11:19:45 -0800
commit07e02215ab168f5b46b4fab4b932e86488ff701f (patch)
tree13af76b3785097e704b4fbce565b9e5ab1e01546
parentadd procs to linux (diff)
downloadinfra-07e02215ab168f5b46b4fab4b932e86488ff701f.tar.gz
initial setup for ollama
-rw-r--r--machines/rivendell.nix1
-rw-r--r--profiles/llm.nix18
2 files changed, 19 insertions, 0 deletions
diff --git a/machines/rivendell.nix b/machines/rivendell.nix
index a2ca15b..f1e1dd5 100644
--- a/machines/rivendell.nix
+++ b/machines/rivendell.nix
@@ -13,6 +13,7 @@
../profiles/git-server.nix
../profiles/hardware/framework-desktop.nix
../profiles/home-manager.nix
+ ../profiles/llm.nix
../profiles/miniflux.nix
../profiles/monitoring.nix
../profiles/remote-unlock.nix
diff --git a/profiles/llm.nix b/profiles/llm.nix
new file mode 100644
index 0000000..d5d02f4
--- /dev/null
+++ b/profiles/llm.nix
@@ -0,0 +1,18 @@
+{ ... }:
+{
+ services.ollama = {
+ enable = true;
+ acceleration = "rocm";
+ host = "0.0.0.0";
+ openFirewall = true;
+ environmentVariables = {
+ # Flash Attention: Improves memory efficiency and speeds up long context
+ OLLAMA_FLASH_ATTENTION = "1";
+ # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization)
+ OLLAMA_KV_CACHE_TYPE = "q8_0";
+ };
+ loadModels = [
+ "qwen3:8b" # https://ollama.com/library/qwen3
+ ];
+ };
+}