3 parallel models

This commit is contained in:
2026-03-26 20:50:24 -04:00
parent fde7963379
commit 9bd6525dd0
4 changed files with 67 additions and 3 deletions

View File

@@ -31,7 +31,7 @@ in
FLASH_ATTENTION = "1";
OLLAMA_KV_CACHE_TYPE = "q8_0";
# Ollama memory configuration
OLLAMA_MAX_LOADED_MODELS = "2";
OLLAMA_MAX_LOADED_MODELS = "3";
OLLAMA_MAX_QUEUE = "512";
OLLAMA_NUM_PARALLEL = "1";