From 67e4dc15e768e6596e6ccdbd32d87e5946cea0b9 Mon Sep 17 00:00:00 2001 From: ahuston-0 Date: Mon, 23 Mar 2026 23:00:22 -0400 Subject: [PATCH 1/3] llama 4 scout --- systems/palatine-hill/ollama.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index bbeb41b..ceaa6be 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -22,7 +22,7 @@ in "qwen2.5-coder:1.5b-base" "qwen2.5-coder:14b" "nomic-embed-text:latest" - "llama3.3:8b" + "llama4:scout" "mistral:7b" "deepseek-v3.2:cloud" "minimax-m2.7:cloud" -- 2.53.0 From 3557b88d7cfebe682c9424b8aac14545bf9c7956 Mon Sep 17 00:00:00 2001 From: ahuston-0 Date: Mon, 23 Mar 2026 23:14:05 -0400 Subject: [PATCH 2/3] ollama --- systems/palatine-hill/ollama.nix | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index ceaa6be..66207b8 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -12,19 +12,18 @@ in package = pkgs.ollama; syncModels = true; loadModels = [ - "deepseek-r1:latest" "deepseek-r1:1.5b" "deepseek-r1:32b" + "deepseek-r1:70b" "qwen3" #"qwen3-coder-next" - "qwen3-coder" + "qwen3-coder-next" "lennyerik/zeta" - "qwen2.5-coder:1.5b-base" "qwen2.5-coder:14b" + "qwen2.5-coder:32b" "nomic-embed-text:latest" "llama4:scout" "mistral:7b" - "deepseek-v3.2:cloud" "minimax-m2.7:cloud" "lfm2:24b" "glm-4.7-flash" @@ -34,9 +33,9 @@ in FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration - OLLAMA_MAX_LOADED_MODELS = "2"; - OLLAMA_MAX_QUEUE = "4"; - OLLAMA_NUM_PARALLEL = "2"; + OLLAMA_MAX_LOADED_MODELS = "3"; + OLLAMA_MAX_QUEUE = "512"; + OLLAMA_NUM_PARALLEL = "3"; # ROCm memory optimization #HIP_VISIBLE_DEVICES = "0"; -- 2.53.0 From de45a27860bc0ffb9483ec194f189c1a4bbc1cc9 Mon Sep 17 00:00:00 2001 From: ahuston-0 Date: Mon, 23 Mar 2026 23:17:14 -0400 Subject: [PATCH 3/3] extend context --- systems/palatine-hill/ollama.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index 66207b8..d9bc296 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -42,7 +42,7 @@ in #ROCR_VISIBLE_DEVICES = "0"; # context length for agents - OLLAMA_CONTEXT_LENGTH = "64000"; + OLLAMA_CONTEXT_LENGTH = "128000"; }; openFirewall = true; host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network -- 2.53.0