initial setup for ollama

author: Franck Cuny <franck@fcuny.net> 2026-01-02 11:19:45 -0800
committer: Franck Cuny <franck@fcuny.net> 2026-01-02 11:19:45 -0800
commit: 07e02215ab168f5b46b4fab4b932e86488ff701f (patch)
tree: 13af76b3785097e704b4fbce565b9e5ab1e01546
parent: add procs to linux (diff)
download: infra-07e02215ab168f5b46b4fab4b932e86488ff701f.tar.gz
2 files changed, 19 insertions, 0 deletions
diff --git a/machines/rivendell.nix b/machines/rivendell.nix
index a2ca15b..f1e1dd5 100644
--- a/machines/rivendell.nix
+++ b/machines/rivendell.nix
@@ -13,6 +13,7 @@
     ../profiles/git-server.nix
     ../profiles/hardware/framework-desktop.nix
     ../profiles/home-manager.nix
+    ../profiles/llm.nix
     ../profiles/miniflux.nix
     ../profiles/monitoring.nix
     ../profiles/remote-unlock.nix
diff --git a/profiles/llm.nix b/profiles/llm.nix
new file mode 100644
index 0000000..d5d02f4
--- /dev/null
+++ b/profiles/llm.nix
@@ -0,0 +1,18 @@
+{ ... }:
+{
+  services.ollama = {
+    enable = true;
+    acceleration = "rocm";
+    host = "0.0.0.0";
+    openFirewall = true;
+    environmentVariables = {
+      # Flash Attention: Improves memory efficiency and speeds up long context
+      OLLAMA_FLASH_ATTENTION = "1";
+      # KV cache quantization: Reduces VRAM footprint (q8_0 = 8-bit quantization)
+      OLLAMA_KV_CACHE_TYPE = "q8_0";
+    };
+    loadModels = [
+      "qwen3:8b" # https://ollama.com/library/qwen3
+    ];
+  };
+}
author	Franck Cuny <franck@fcuny.net>	2026-01-02 11:19:45 -0800
committer	Franck Cuny <franck@fcuny.net>	2026-01-02 11:19:45 -0800
commit	07e02215ab168f5b46b4fab4b932e86488ff701f (patch)
tree	13af76b3785097e704b4fbce565b9e5ab1e01546
parent	add procs to linux (diff)
download	infra-07e02215ab168f5b46b4fab4b932e86488ff701f.tar.gz