3 parallel models

This commit is contained in:
2026-03-26 20:50:24 -04:00
parent fde7963379
commit 9bd6525dd0
4 changed files with 67 additions and 3 deletions

View File

@@ -49,5 +49,30 @@
# Enable containerd for Kubernetes
virtualisation.containerd.enable = true;
# Enable kubelet
services.kubelet = {
enable = true;
extraFlags = {
"pod-infra-container-image" = "registry.k8s.io/pause:3.9";
};
};
# Enable kubeadm for cluster initialization
environment.etc."kubeadm.yaml".text = ''
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 127.0.0.1
bindPort: 6443
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
clusterName: ${config.services.kubernetes.clusterName}
controlPlaneEndpoint: ${config.services.kubernetes.controlPlaneEndpoint}
networking:
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16
dnsDomain: cluster.local
'';
};
}

View File

@@ -17,7 +17,6 @@
./minio.nix
./networking.nix
./nextcloud.nix
./ollama.nix
#./plex
./postgresql.nix
./samba.nix

View File

@@ -31,7 +31,7 @@ in
FLASH_ATTENTION = "1";
OLLAMA_KV_CACHE_TYPE = "q8_0";
# Ollama memory configuration
OLLAMA_MAX_LOADED_MODELS = "2";
OLLAMA_MAX_LOADED_MODELS = "3";
OLLAMA_MAX_QUEUE = "512";
OLLAMA_NUM_PARALLEL = "1";

View File

@@ -85,7 +85,32 @@
#keep_alive = "15m";
available_models = [
{
name = "glm-4.7-flash";
name = "deepseek-r1:1.5b";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "deepseek-r1:32b";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "deepseek-r1:70b";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "qwen3-coder-next";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "lennyerik/zeta";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "nomic-embed-text:latest";
max_tokens = 128000;
keep_alive = "15m";
}
@@ -94,6 +119,21 @@
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "glm-4.7-flash";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "nemotron-cascade-2:30b";
max_tokens = 128000;
keep_alive = "15m";
}
{
name = "magistral";
max_tokens = 128000;
keep_alive = "15m";
}
];
};
};