add ollama back to artemision

2026-03-26 21:40:12 -04:00
parent 9bd6525dd0
commit cbc1a210dc
4 changed files with 45 additions and 27 deletions
--- a/systems/artemision/configuration.nix
+++ b/systems/artemision/configuration.nix
@@ -1,6 +1,7 @@
 {
  lib,
  pkgs,
+  config,
  ...
 }:
 {
@@ -17,6 +18,7 @@
    ./stylix.nix
    ./wifi.nix
    ./zerotier.nix
+    ../palatine-hill/ollama.nix
  ];

  time.timeZone = "America/New_York";
@@ -39,6 +41,20 @@
  sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ];

  services = {
+    ollama = {
+      package = lib.mkForce pkgs.ollama-rocm;
+      models = lib.mkForce "${config.services.ollama.home}/models";
+      loadModels = lib.mkForce [
+        "deepseek-r1:1.5b"
+        "qwen3-coder-next"
+        "lennyerik/zeta"
+        "nomic-embed-text:latest"
+        "glm-4.7-flash"
+        "magistral"
+        "devstral-small-2"
+        "starcoder2:7b"
+      ];
+    };
    flatpak.enable = true;
    calibre-web = {
      # temp disable this
--- a/systems/palatine-hill/ollama.nix
+++ b/systems/palatine-hill/ollama.nix
@@ -25,11 +25,12 @@ in
        "nemotron-cascade-2:30b"
        "magistral"
        "devstral-small-2"
+        "starcoder2:15b"
      ];
      models = vars.primary_ollama;
      environmentVariables = {
        FLASH_ATTENTION = "1";
-        OLLAMA_KV_CACHE_TYPE = "q8_0";
+        OLLAMA_KV_CACHE_TYPE = "q4_0";
        # Ollama memory configuration
        OLLAMA_MAX_LOADED_MODELS = "3";
        OLLAMA_MAX_QUEUE = "512";