ollama stuff

2026-03-26 11:31:21 -04:00
parent de45a27860
commit 85721481b4
5 changed files with 49 additions and 54 deletions
--- a/systems/palatine-hill/configuration.nix
+++ b/systems/palatine-hill/configuration.nix
@@ -17,7 +17,6 @@
    ./minio.nix
    ./networking.nix
    ./nextcloud.nix
-    ./ollama.nix
    #./plex
    ./postgresql.nix
    ./samba.nix
@@ -26,10 +25,8 @@

  programs.git.lfs.enable = false;

-  nixpkgs.config = {
-    packageOverrides = pkgs: {
-      vaapiIntel = pkgs.vaapiIntel.override { enableHybridCodec = true; };
-    };
+  nixpkgs.config.packageOverrides = pkgs: {
+    vaapiIntel = pkgs.vaapiIntel.override { enableHybridCodec = true; };
  };

  boot = {
@@ -100,13 +97,6 @@
    smartd.enable = true;
    calibre-server.enable = false;

-    # Kubernetes example configuration
-    # To enable Kubernetes, uncomment the following:
-    # kubernetes = {
-    #   enable = true;
-    #   clusterName = "palatine-hill-cluster";
-    #   controlPlaneEndpoint = "localhost:6443";
-    # };
  };

  nix.gc.options = "--delete-older-than 150d";
--- a/systems/palatine-hill/ollama.nix
+++ b/systems/palatine-hill/ollama.nix
@@ -15,16 +15,11 @@ in
        "deepseek-r1:1.5b"
        "deepseek-r1:32b"
        "deepseek-r1:70b"
-        "qwen3"
-        #"qwen3-coder-next"
+        #"qwen3"
+        #"qwen3.5:latest"
        "qwen3-coder-next"
        "lennyerik/zeta"
-        "qwen2.5-coder:14b"
-        "qwen2.5-coder:32b"
        "nomic-embed-text:latest"
-        "llama4:scout"
-        "mistral:7b"
-        "minimax-m2.7:cloud"
        "lfm2:24b"
        "glm-4.7-flash"
      ];
@@ -33,9 +28,9 @@ in
        FLASH_ATTENTION = "1";
        OLLAMA_KV_CACHE_TYPE = "q8_0";
        # Ollama memory configuration
-        OLLAMA_MAX_LOADED_MODELS = "3";
+        OLLAMA_MAX_LOADED_MODELS = "2";
        OLLAMA_MAX_QUEUE = "512";
-        OLLAMA_NUM_PARALLEL = "3";
+        OLLAMA_NUM_PARALLEL = "1";

        # ROCm memory optimization
        #HIP_VISIBLE_DEVICES = "0";