diff --git a/modules/kubernetes.nix b/modules/kubernetes.nix index 9c959cf..d4c2365 100644 --- a/modules/kubernetes.nix +++ b/modules/kubernetes.nix @@ -46,8 +46,33 @@ kubernetes ]; - # Enable containerd for Kubernetes - virtualisation.containerd.enable = true; + ## Enable containerd for Kubernetes + #virtualisation.containerd.enable = true; + ## Enable kubelet + #services.kubelet = { + # enable = true; + # extraFlags = { + # "pod-infra-container-image" = "registry.k8s.io/pause:3.9"; + # }; + #}; + + ## Enable kubeadm for cluster initialization + #environment.etc."kubeadm.yaml".text = '' + # apiVersion: kubeadm.k8s.io/v1beta3 + # kind: InitConfiguration + # localAPIEndpoint: + # advertiseAddress: 127.0.0.1 + # bindPort: 6443 + # --- + # apiVersion: kubeadm.k8s.io/v1beta3 + # kind: ClusterConfiguration + # clusterName: ${config.services.kubernetes.clusterName} + # controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint} + # networking: + # serviceSubnet: 10.96.0.0/12 + # podSubnet: 10.244.0.0/16 + # dnsDomain: cluster.local + #''; }; } diff --git a/systems/artemision/configuration.nix b/systems/artemision/configuration.nix index 172df93..cede5f2 100644 --- a/systems/artemision/configuration.nix +++ b/systems/artemision/configuration.nix @@ -1,6 +1,7 @@ { lib, pkgs, + config, ... }: { @@ -17,6 +18,7 @@ ./stylix.nix ./wifi.nix ./zerotier.nix + ../palatine-hill/ollama.nix ]; time.timeZone = "America/New_York"; @@ -39,6 +41,20 @@ sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ]; services = { + ollama = { + package = lib.mkForce pkgs.ollama-rocm; + models = lib.mkForce "${config.services.ollama.home}/models"; + loadModels = lib.mkForce [ + "deepseek-r1:1.5b" + "qwen3-coder-next" + "lennyerik/zeta" + "nomic-embed-text:latest" + "glm-4.7-flash" + "magistral" + "devstral-small-2" + "starcoder2:7b" + ]; + }; flatpak.enable = true; calibre-web = { # temp disable this diff --git a/systems/palatine-hill/configuration.nix b/systems/palatine-hill/configuration.nix index 8dc4906..c41c3e8 100644 --- a/systems/palatine-hill/configuration.nix +++ b/systems/palatine-hill/configuration.nix @@ -17,7 +17,6 @@ ./minio.nix ./networking.nix ./nextcloud.nix - ./ollama.nix #./plex ./postgresql.nix ./samba.nix diff --git a/systems/palatine-hill/ollama.nix b/systems/palatine-hill/ollama.nix index 40ee0dc..d304d7e 100644 --- a/systems/palatine-hill/ollama.nix +++ b/systems/palatine-hill/ollama.nix @@ -25,13 +25,14 @@ in "nemotron-cascade-2:30b" "magistral" "devstral-small-2" + "starcoder2:15b" ]; models = vars.primary_ollama; environmentVariables = { FLASH_ATTENTION = "1"; - OLLAMA_KV_CACHE_TYPE = "q8_0"; + OLLAMA_KV_CACHE_TYPE = "q4_0"; # Ollama memory configuration - OLLAMA_MAX_LOADED_MODELS = "2"; + OLLAMA_MAX_LOADED_MODELS = "3"; OLLAMA_MAX_QUEUE = "512"; OLLAMA_NUM_PARALLEL = "1"; diff --git a/users/alice/non-server.nix b/users/alice/non-server.nix index dfc8099..2d78a60 100644 --- a/users/alice/non-server.nix +++ b/users/alice/non-server.nix @@ -85,7 +85,32 @@ #keep_alive = "15m"; available_models = [ { - name = "glm-4.7-flash"; + name = "deepseek-r1:1.5b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "deepseek-r1:32b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "deepseek-r1:70b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "qwen3-coder-next"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "lennyerik/zeta"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "nomic-embed-text:latest"; max_tokens = 128000; keep_alive = "15m"; } @@ -94,6 +119,22 @@ max_tokens = 128000; keep_alive = "15m"; } + { + name = "glm-4.7-flash"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "nemotron-cascade-2:30b"; + max_tokens = 128000; + keep_alive = "15m"; + } + { + name = "magistral"; + max_tokens = 128000; + keep_alive = "15m"; + } + ]; }; };