3 parallel models

2026-03-26 20:50:24 -04:00
parent fde7963379
commit 9bd6525dd0
4 changed files with 67 additions and 3 deletions
--- a/modules/kubernetes.nix
+++ b/modules/kubernetes.nix
@@ -49,5 +49,30 @@
    # Enable containerd for Kubernetes
    virtualisation.containerd.enable = true;
    # Enable kubelet
    services.kubelet = {
      enable = true;
      extraFlags = {
        "pod-infra-container-image" = "registry.k8s.io/pause:3.9";
      };
    };
    # Enable kubeadm for cluster initialization
    environment.etc."kubeadm.yaml".text = ''
      apiVersion: kubeadm.k8s.io/v1beta3
      kind: InitConfiguration
      localAPIEndpoint:
        advertiseAddress: 127.0.0.1
        bindPort: 6443
      ---
      apiVersion: kubeadm.k8s.io/v1beta3
      kind: ClusterConfiguration
      clusterName: ${config.services.kubernetes.clusterName}
      controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint}
      networking:
        serviceSubnet: 10.96.0.0/12
        podSubnet: 10.244.0.0/16
        dnsDomain: cluster.local
    '';
  };
 }
--- a/systems/palatine-hill/configuration.nix
+++ b/systems/palatine-hill/configuration.nix
@@ -17,7 +17,6 @@
    ./minio.nix
    ./networking.nix
    ./nextcloud.nix
    ./ollama.nix
    #./plex
    ./postgresql.nix
    ./samba.nix
--- a/systems/palatine-hill/ollama.nix
+++ b/systems/palatine-hill/ollama.nix
@@ -31,7 +31,7 @@ in
        FLASH_ATTENTION = "1";
        OLLAMA_KV_CACHE_TYPE = "q8_0";
        # Ollama memory configuration
-        OLLAMA_MAX_LOADED_MODELS = "2";
+        OLLAMA_MAX_LOADED_MODELS = "3";
        OLLAMA_MAX_QUEUE = "512";
        OLLAMA_NUM_PARALLEL = "1";
--- a/users/alice/non-server.nix
+++ b/users/alice/non-server.nix
@@ -85,7 +85,32 @@
            #keep_alive = "15m";
            available_models = [
              {
-                name = "glm-4.7-flash";
+                name = "deepseek-r1:1.5b";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "deepseek-r1:32b";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "deepseek-r1:70b";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "qwen3-coder-next";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "lennyerik/zeta";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "nomic-embed-text:latest";
                max_tokens = 128000;
                keep_alive = "15m";
              }
@@ -94,6 +119,21 @@
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "glm-4.7-flash";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "nemotron-cascade-2:30b";
                max_tokens = 128000;
                keep_alive = "15m";
              }
              {
                name = "magistral";
                max_tokens = 128000;
                keep_alive = "15m";
              }
            ];
          };
        };