add ollama back to artemision
This commit is contained in:
@@ -25,11 +25,12 @@ in
|
||||
"nemotron-cascade-2:30b"
|
||||
"magistral"
|
||||
"devstral-small-2"
|
||||
"starcoder2:15b"
|
||||
];
|
||||
models = vars.primary_ollama;
|
||||
environmentVariables = {
|
||||
FLASH_ATTENTION = "1";
|
||||
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
||||
OLLAMA_KV_CACHE_TYPE = "q4_0";
|
||||
# Ollama memory configuration
|
||||
OLLAMA_MAX_LOADED_MODELS = "3";
|
||||
OLLAMA_MAX_QUEUE = "512";
|
||||
|
||||
Reference in New Issue
Block a user