Files
nix-dotfiles/systems/palatine-hill/ollama.nix

76 lines
1.8 KiB
Nix
Raw Normal View History

2026-03-19 23:17:17 -04:00
{
pkgs,
...
}:
2026-03-21 23:40:19 -04:00
let
vars = import ./vars.nix;
in
2026-03-19 23:17:17 -04:00
{
services = {
ollama = {
enable = true;
package = pkgs.ollama;
syncModels = true;
loadModels = [
"deepseek-r1:1.5b"
2026-03-23 21:37:02 -04:00
"deepseek-r1:32b"
2026-03-23 23:14:05 -04:00
"deepseek-r1:70b"
2026-03-26 11:31:21 -04:00
#"qwen3"
#"qwen3.5:latest"
2026-03-23 23:14:05 -04:00
"qwen3-coder-next"
2026-03-19 23:17:17 -04:00
"lennyerik/zeta"
"nomic-embed-text:latest"
2026-03-23 22:03:31 -04:00
"lfm2:24b"
"glm-4.7-flash"
2026-03-26 11:40:57 -04:00
"nemotron-cascade-2:30b"
"magistral"
2026-03-19 23:17:17 -04:00
];
2026-03-21 23:40:19 -04:00
models = vars.primary_ollama;
2026-03-19 23:17:17 -04:00
environmentVariables = {
FLASH_ATTENTION = "1";
OLLAMA_KV_CACHE_TYPE = "q8_0";
# Ollama memory configuration
2026-03-26 11:31:21 -04:00
OLLAMA_MAX_LOADED_MODELS = "2";
2026-03-23 23:14:05 -04:00
OLLAMA_MAX_QUEUE = "512";
2026-03-26 11:31:21 -04:00
OLLAMA_NUM_PARALLEL = "1";
2026-03-19 23:17:17 -04:00
# ROCm memory optimization
#HIP_VISIBLE_DEVICES = "0";
#ROCR_VISIBLE_DEVICES = "0";
# context length for agents
2026-03-23 23:17:14 -04:00
OLLAMA_CONTEXT_LENGTH = "128000";
2026-03-19 23:17:17 -04:00
};
2026-03-21 23:40:19 -04:00
openFirewall = true;
host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
2026-03-19 23:17:17 -04:00
};
open-webui = {
enable = true;
port = 21212;
2026-03-21 23:40:19 -04:00
openFirewall = true;
host = "0.0.0.0"; # don't want to make this available via load-balancer yet, so making it available on the local network
2026-03-19 23:17:17 -04:00
};
};
users.users.ollama = {
extraGroups = [
"render"
"video"
];
group = "ollama";
isSystemUser = true;
};
users.groups.ollama = { };
2026-03-21 23:40:19 -04:00
systemd.services = {
ollama.serviceConfig = {
Nice = 19;
IOSchedulingPriority = 7;
};
ollama-model-loader.serviceConfig = {
Nice = 19;
CPUWeight = 50;
IOSchedulingClass = "idle";
IOSchedulingPriority = 7;
};
};
2026-03-19 23:17:17 -04:00
}