From b5e45398d87f30df5539902febb3a5f4bb0ce2b5 Mon Sep 17 00:00:00 2001 From: ahuston-0 Date: Thu, 26 Mar 2026 20:50:24 -0400 Subject: [PATCH 1/2] 3 parallel models --- modules/kubernetes.nix | 25 +++++++++++++++ systems/palatine-hill/configuration.nix | 1 - systems/palatine-hill/ollama.nix | 2 +- users/alice/non-server.nix | 42 ++++++++++++++++++++++++- 4 files changed, 67 insertions(+), 3 deletions(-) diff --git a/modules/kubernetes.nix b/modules/kubernetes.nix index 9c959cf..fe269a0 100644 --- a/modules/kubernetes.nix +++ b/modules/kubernetes.nix @@ -49,5 +49,30 @@ # Enable containerd for Kubernetes virtualisation.containerd.enable = true; + # Enable kubelet + services.kubelet = { + enable = true; + extraFlags = { + "pod-infra-container-image" = "registry.k8s.io/pause:3.9"; + }; + }; + + # Enable kubeadm for cluster initialization + environment.etc."kubeadm.yaml".text = '' + apiVersion: kubeadm.k8s.io/v1beta3 + kind: InitConfiguration + localAPIEndpoint: + advertiseAddress: 127.0.0.1 + bindPort: 6443 + --- + apiVersion: kubeadm.k8s.io/v1beta3 + kind: ClusterConfiguration + clusterName: ${config.services.kubernetes.clusterName} + controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint} + networking: + serviceSubnet: 10.96.0.0/12 + podSubnet: 10.244.0.0/16 + dnsDomain: cluster.local + ''; }; } diff --git a/systems/palatine-hill/configuration.nix b/systems/palatine-hill/configuration.nix index 8dc4906..c41c3e8 100644 --- a/systems/palatine-hill/configuration.nix +++ b/systems/palatine-hill/configuration.nix @@ -17,7 +17,6 @@ ./minio.nix ./networking.nix ./nextcloud.nix - ./ollama.nix #./plex ./postgresql.nix ./samba.nix diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index 40ee0dc..b418d51 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -31,7 +31,7 @@ in FLASH_ATTENTION = "1"; OLLAMA_KV_CACHE_TYPE = "q8_0"; # Ollama memory configuration - OLLAMA_MAX_LOADED_MODELS = "2"; + OLLAMA_MAX_LOADED_MODELS = "3"; OLLAMA_MAX_QUEUE = "512"; OLLAMA_NUM_PARALLEL = "1"; diff --git a/users/alice/non-server.nix b/users/alice/non-server.nix index dfc8099..ad17959 100644 --- a/users/alice/non-server.nix +++ b/users/alice/non-server.nix @@ -85,7 +85,32 @@ #keep_alive = "15m"; available_models = [ { - name = "glm-4.7-flash"; + name = "deepseek-r1:1.5b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "deepseek-r1:32b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "deepseek-r1:70b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "qwen3-coder-next"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "lennyerik/zeta"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "nomic-embed-text:latest"; max_tokens = 128000; keep_alive = "15m"; } @@ -94,6 +119,21 @@ max_tokens = 128000; keep_alive = "15m"; } + { + name = "glm-4.7-flash"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "nemotron-cascade-2:30b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "magistral"; + max_tokens = 128000; + keep_alive = "15m"; + } ]; }; }; From 802bf1ca4c742ae3f6929c33103147b5300e0577 Mon Sep 17 00:00:00 2001 From: ahuston-0 Date: Thu, 26 Mar 2026 21:40:12 -0400 Subject: [PATCH 2/2] add ollama back to artemision --- modules/kubernetes.nix | 52 ++++++++++++++-------------- systems/artemision/configuration.nix | 16 +++++++++ systems/palatine-hill/ollama.nix | 3 +- users/alice/non-server.nix | 1 + 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/modules/kubernetes.nix b/modules/kubernetes.nix index fe269a0..d4c2365 100644 --- a/modules/kubernetes.nix +++ b/modules/kubernetes.nix @@ -46,33 +46,33 @@ kubernetes ]; - # Enable containerd for Kubernetes - virtualisation.containerd.enable = true; + ## Enable containerd for Kubernetes + #virtualisation.containerd.enable = true; - # Enable kubelet - services.kubelet = { - enable = true; - extraFlags = { - "pod-infra-container-image" = "registry.k8s.io/pause:3.9"; - }; - }; + ## Enable kubelet + #services.kubelet = { + # enable = true; + # extraFlags = { + # "pod-infra-container-image" = "registry.k8s.io/pause:3.9"; + # }; + #}; - # Enable kubeadm for cluster initialization - environment.etc."kubeadm.yaml".text = '' - apiVersion: kubeadm.k8s.io/v1beta3 - kind: InitConfiguration - localAPIEndpoint: - advertiseAddress: 127.0.0.1 - bindPort: 6443 - --- - apiVersion: kubeadm.k8s.io/v1beta3 - kind: ClusterConfiguration - clusterName: ${config.services.kubernetes.clusterName} - controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint} - networking: - serviceSubnet: 10.96.0.0/12 - podSubnet: 10.244.0.0/16 - dnsDomain: cluster.local - ''; + ## Enable kubeadm for cluster initialization + #environment.etc."kubeadm.yaml".text = '' + # apiVersion: kubeadm.k8s.io/v1beta3 + # kind: InitConfiguration + # localAPIEndpoint: + # advertiseAddress: 127.0.0.1 + # bindPort: 6443 + # --- + # apiVersion: kubeadm.k8s.io/v1beta3 + # kind: ClusterConfiguration + # clusterName: ${config.services.kubernetes.clusterName} + # controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint} + # networking: + # serviceSubnet: 10.96.0.0/12 + # podSubnet: 10.244.0.0/16 + # dnsDomain: cluster.local + #''; }; } diff --git a/systems/artemision/configuration.nix b/systems/artemision/configuration.nix index 172df93..cede5f2 100644 --- a/systems/artemision/configuration.nix +++ b/systems/artemision/configuration.nix @@ -1,6 +1,7 @@ { lib, pkgs, + config, ... }: { @@ -17,6 +18,7 @@ ./stylix.nix ./wifi.nix ./zerotier.nix + ../palatine-hill/ollama.nix ]; time.timeZone = "America/New_York"; @@ -39,6 +41,20 @@ sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ]; services = { + ollama = { + package = lib.mkForce pkgs.ollama-rocm; + models = lib.mkForce "${config.services.ollama.home}/models"; + loadModels = lib.mkForce [ + "deepseek-r1:1.5b" + "qwen3-coder-next" + "lennyerik/zeta" + "nomic-embed-text:latest" + "glm-4.7-flash" + "magistral" + "devstral-small-2" + "starcoder2:7b" + ]; + }; flatpak.enable = true; calibre-web = { # temp disable this diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index b418d51..d304d7e 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -25,11 +25,12 @@ in "nemotron-cascade-2:30b" "magistral" "devstral-small-2" + "starcoder2:15b" ]; models = vars.primary_ollama; environmentVariables = { FLASH_ATTENTION = "1"; - OLLAMA_KV_CACHE_TYPE = "q8_0"; + OLLAMA_KV_CACHE_TYPE = "q4_0"; # Ollama memory configuration OLLAMA_MAX_LOADED_MODELS = "3"; OLLAMA_MAX_QUEUE = "512"; diff --git a/users/alice/non-server.nix b/users/alice/non-server.nix index ad17959..2d78a60 100644 --- a/users/alice/non-server.nix +++ b/users/alice/non-server.nix @@ -134,6 +134,7 @@ max_tokens = 128000; keep_alive = "15m"; } + ]; }; };