3 parallel models
This commit is contained in:
@@ -17,7 +17,6 @@
|
||||
./minio.nix
|
||||
./networking.nix
|
||||
./nextcloud.nix
|
||||
./ollama.nix
|
||||
#./plex
|
||||
./postgresql.nix
|
||||
./samba.nix
|
||||
|
||||
@@ -31,7 +31,7 @@ in
|
||||
FLASH_ATTENTION = "1";
|
||||
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
||||
# Ollama memory configuration
|
||||
OLLAMA_MAX_LOADED_MODELS = "2";
|
||||
OLLAMA_MAX_LOADED_MODELS = "3";
|
||||
OLLAMA_MAX_QUEUE = "512";
|
||||
OLLAMA_NUM_PARALLEL = "1";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user