{ pkgs, ... }: let vars = import ./vars.nix; in { services = { ollama = { enable = true; package = pkgs.ollama; syncModels = true; loadModels = [ "gemma3" "deepseek-r1:latest" "deepseek-r1:1.5b" "qwen3" #"qwen3-coder-next" "qwen3-coder" "lennyerik/zeta" "llama3.1:8b" "qwen2.5-coder:1.5b-base" "nomic-embed-text:latest" ]; models = vars.primary_ollama; environmentVariables = { FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration OLLAMA_MAX_LOADED_MODELS = "2"; OLLAMA_MAX_QUEUE = "4"; OLLAMA_NUM_PARALLEL = "2"; # ROCm memory optimization #HIP_VISIBLE_DEVICES = "0"; #ROCR_VISIBLE_DEVICES = "0"; # context length for agents OLLAMA_CONTEXT_LENGTH = "64000"; }; openFirewall = true; }; open-webui = { enable = true; port = 21212; openFirewall = true; }; }; users.users.ollama = { extraGroups = [ "render" "video" ]; group = "ollama"; isSystemUser = true; }; users.groups.ollama = { }; systemd.services = { ollama.serviceConfig = { Nice = 19; IOSchedulingPriority = 7; }; ollama-model-loader.serviceConfig = { Nice = 19; CPUWeight = 50; IOSchedulingClass = "idle"; IOSchedulingPriority = 7; }; }; }