{ pkgs, ... }: { services = { ollama = { enable = true; package = pkgs.ollama; syncModels = true; loadModels = [ "gemma3" "deepseek-r1:latest" "deepseek-r1:1.5b" "qwen3" #"qwen3-coder-next" "qwen3-coder" "lennyerik/zeta" "llama3.1:8b" "qwen2.5-coder:1.5b-base" "nomic-embed-text:latest" ]; environmentVariables = { FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration OLLAMA_MAX_LOADED_MODELS = "2"; OLLAMA_MAX_QUEUE = "4"; OLLAMA_NUM_PARALLEL = "2"; # ROCm memory optimization #HIP_VISIBLE_DEVICES = "0"; #ROCR_VISIBLE_DEVICES = "0"; # context length for agents OLLAMA_CONTEXT_LENGTH = "64000"; }; }; open-webui = { enable = true; port = 21212; }; }; users.users.ollama = { extraGroups = [ "render" "video" ]; group = "ollama"; isSystemUser = true; }; users.groups.ollama = { }; }