{
  pkgs,
  ...
}:
let
  vars = import ./vars.nix;
in
{
  services = {
    ollama = {
      enable = true;
      package = pkgs.ollama;
      syncModels = true;
      loadModels = [
        "deepseek-r1:1.5b"
        "deepseek-r1:32b"
        "deepseek-r1:70b"
        #"qwen3"
        #"qwen3.5:latest"
        "qwen3-coder-next"
        "lennyerik/zeta"
        "nomic-embed-text:latest"
        "lfm2:24b"
        "glm-4.7-flash"
        "nemotron-cascade-2:30b"
        "magistral"
        "devstral-small-2"
      ];
      models = vars.primary_ollama;
      environmentVariables = {
        FLASH_ATTENTION = "1";
        OLLAMA_KV_CACHE_TYPE = "q8_0";
        # Ollama memory configuration
        OLLAMA_MAX_LOADED_MODELS = "3";
        OLLAMA_MAX_QUEUE = "512";
        OLLAMA_NUM_PARALLEL = "1";

        # ROCm memory optimization
        #HIP_VISIBLE_DEVICES = "0";
        #ROCR_VISIBLE_DEVICES = "0";

        # context length for agents
        OLLAMA_CONTEXT_LENGTH = "128000";
      };
      openFirewall = true;
      host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
    };
    open-webui = {
      enable = true;
      port = 21212;
      openFirewall = true;
      host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
    };
  };
  users.users.ollama = {
    extraGroups = [
      "render"
      "video"
    ];
    group = "ollama";
    isSystemUser = true;
  };
  users.groups.ollama = { };
  systemd.services = {
    ollama.serviceConfig = {
      Nice = 19;
      IOSchedulingPriority = 7;
    };
    ollama-model-loader.serviceConfig = {
      Nice = 19;
      CPUWeight = 50;
      IOSchedulingClass = "idle";
      IOSchedulingPriority = 7;
    };
  };
}