2026-03-19 23:17:17 -04:00
|
|
|
{
|
|
|
|
|
pkgs,
|
|
|
|
|
...
|
|
|
|
|
}:
|
2026-03-21 23:40:19 -04:00
|
|
|
let
|
|
|
|
|
vars = import ./vars.nix;
|
|
|
|
|
in
|
2026-03-19 23:17:17 -04:00
|
|
|
{
|
|
|
|
|
services = {
|
|
|
|
|
ollama = {
|
|
|
|
|
enable = true;
|
|
|
|
|
package = pkgs.ollama;
|
|
|
|
|
syncModels = true;
|
|
|
|
|
loadModels = [
|
|
|
|
|
"deepseek-r1:1.5b"
|
2026-03-23 21:37:02 -04:00
|
|
|
"deepseek-r1:32b"
|
2026-03-23 23:14:05 -04:00
|
|
|
"deepseek-r1:70b"
|
2026-03-26 11:31:21 -04:00
|
|
|
#"qwen3"
|
|
|
|
|
#"qwen3.5:latest"
|
2026-03-23 23:14:05 -04:00
|
|
|
"qwen3-coder-next"
|
2026-03-19 23:17:17 -04:00
|
|
|
"lennyerik/zeta"
|
|
|
|
|
"nomic-embed-text:latest"
|
2026-03-23 22:03:31 -04:00
|
|
|
"lfm2:24b"
|
|
|
|
|
"glm-4.7-flash"
|
2026-03-26 11:40:57 -04:00
|
|
|
"nemotron-cascade-2:30b"
|
|
|
|
|
"magistral"
|
2026-03-26 12:23:54 -04:00
|
|
|
"devstral-small-2"
|
2026-03-19 23:17:17 -04:00
|
|
|
];
|
2026-03-21 23:40:19 -04:00
|
|
|
models = vars.primary_ollama;
|
2026-03-19 23:17:17 -04:00
|
|
|
environmentVariables = {
|
|
|
|
|
FLASH_ATTENTION = "1";
|
|
|
|
|
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
|
|
|
|
# Ollama memory configuration
|
2026-03-26 20:50:24 -04:00
|
|
|
OLLAMA_MAX_LOADED_MODELS = "3";
|
2026-03-23 23:14:05 -04:00
|
|
|
OLLAMA_MAX_QUEUE = "512";
|
2026-03-26 11:31:21 -04:00
|
|
|
OLLAMA_NUM_PARALLEL = "1";
|
2026-03-19 23:17:17 -04:00
|
|
|
|
|
|
|
|
# ROCm memory optimization
|
|
|
|
|
#HIP_VISIBLE_DEVICES = "0";
|
|
|
|
|
#ROCR_VISIBLE_DEVICES = "0";
|
|
|
|
|
|
|
|
|
|
# context length for agents
|
2026-03-23 23:17:14 -04:00
|
|
|
OLLAMA_CONTEXT_LENGTH = "128000";
|
2026-03-19 23:17:17 -04:00
|
|
|
};
|
2026-03-21 23:40:19 -04:00
|
|
|
openFirewall = true;
|
2026-03-23 20:47:32 -04:00
|
|
|
host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
|
2026-03-19 23:17:17 -04:00
|
|
|
};
|
|
|
|
|
open-webui = {
|
|
|
|
|
enable = true;
|
|
|
|
|
port = 21212;
|
2026-03-21 23:40:19 -04:00
|
|
|
openFirewall = true;
|
2026-03-23 20:47:32 -04:00
|
|
|
host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
|
2026-03-19 23:17:17 -04:00
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
users.users.ollama = {
|
|
|
|
|
extraGroups = [
|
|
|
|
|
"render"
|
|
|
|
|
"video"
|
|
|
|
|
];
|
|
|
|
|
group = "ollama";
|
|
|
|
|
isSystemUser = true;
|
|
|
|
|
};
|
|
|
|
|
users.groups.ollama = { };
|
2026-03-21 23:40:19 -04:00
|
|
|
systemd.services = {
|
|
|
|
|
ollama.serviceConfig = {
|
|
|
|
|
Nice = 19;
|
|
|
|
|
IOSchedulingPriority = 7;
|
|
|
|
|
};
|
|
|
|
|
ollama-model-loader.serviceConfig = {
|
|
|
|
|
Nice = 19;
|
|
|
|
|
CPUWeight = 50;
|
|
|
|
|
IOSchedulingClass = "idle";
|
|
|
|
|
IOSchedulingPriority = 7;
|
|
|
|
|
};
|
|
|
|
|
};
|
2026-03-19 23:17:17 -04:00
|
|
|
}
|