{ pkgs, ... }: let vars = import ./vars.nix; in { services = { ollama = { enable = true; package = pkgs.ollama; syncModels = true; loadModels = [ "deepseek-r1:1.5b" "deepseek-r1:32b" "deepseek-r1:70b" "qwen3" #"qwen3-coder-next" "qwen3-coder-next" "lennyerik/zeta" "qwen2.5-coder:14b" "qwen2.5-coder:32b" "nomic-embed-text:latest" "llama4:scout" "mistral:7b" "minimax-m2.7:cloud" "lfm2:24b" "glm-4.7-flash" ]; models = vars.primary_ollama; environmentVariables = { FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration OLLAMA_MAX_LOADED_MODELS = "3"; OLLAMA_MAX_QUEUE = "512"; OLLAMA_NUM_PARALLEL = "3"; # ROCm memory optimization #HIP_VISIBLE_DEVICES = "0"; #ROCR_VISIBLE_DEVICES = "0"; # context length for agents OLLAMA_CONTEXT_LENGTH = "128000"; }; openFirewall = true; host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network }; open-webui = { enable = true; port = 21212; openFirewall = true; host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network }; }; users.users.ollama = { extraGroups = [ "render" "video" ]; group = "ollama"; isSystemUser = true; }; users.groups.ollama = { }; systemd.services = { ollama.serviceConfig = { Nice = 19; IOSchedulingPriority = 7; }; ollama-model-loader.serviceConfig = { Nice = 19; CPUWeight = 50; IOSchedulingClass = "idle"; IOSchedulingPriority = 7; }; }; }