diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index bbeb41b..d9bc296 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -12,19 +12,18 @@ in package = pkgs.ollama; syncModels = true; loadModels = [ - "deepseek-r1:latest" "deepseek-r1:1.5b" "deepseek-r1:32b" + "deepseek-r1:70b" "qwen3" #"qwen3-coder-next" - "qwen3-coder" + "qwen3-coder-next" "lennyerik/zeta" - "qwen2.5-coder:1.5b-base" "qwen2.5-coder:14b" + "qwen2.5-coder:32b" "nomic-embed-text:latest" - "llama3.3:8b" + "llama4:scout" "mistral:7b" - "deepseek-v3.2:cloud" "minimax-m2.7:cloud" "lfm2:24b" "glm-4.7-flash" @@ -34,16 +33,16 @@ in FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration - OLLAMA_MAX_LOADED_MODELS = "2"; - OLLAMA_MAX_QUEUE = "4"; - OLLAMA_NUM_PARALLEL = "2"; + OLLAMA_MAX_LOADED_MODELS = "3"; + OLLAMA_MAX_QUEUE = "512"; + OLLAMA_NUM_PARALLEL = "3"; # ROCm memory optimization #HIP_VISIBLE_DEVICES = "0"; #ROCR_VISIBLE_DEVICES = "0"; # context length for agents - OLLAMA_CONTEXT_LENGTH = "64000"; + OLLAMA_CONTEXT_LENGTH = "128000"; }; openFirewall = true; host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network