feature/fwupd #196
@@ -12,19 +12,18 @@ in
|
|||||||
package = pkgs.ollama;
|
package = pkgs.ollama;
|
||||||
syncModels = true;
|
syncModels = true;
|
||||||
loadModels = [
|
loadModels = [
|
||||||
"deepseek-r1:latest"
|
|
||||||
"deepseek-r1:1.5b"
|
"deepseek-r1:1.5b"
|
||||||
"deepseek-r1:32b"
|
"deepseek-r1:32b"
|
||||||
|
"deepseek-r1:70b"
|
||||||
"qwen3"
|
"qwen3"
|
||||||
#"qwen3-coder-next"
|
#"qwen3-coder-next"
|
||||||
"qwen3-coder"
|
"qwen3-coder-next"
|
||||||
"lennyerik/zeta"
|
"lennyerik/zeta"
|
||||||
"qwen2.5-coder:1.5b-base"
|
|
||||||
"qwen2.5-coder:14b"
|
"qwen2.5-coder:14b"
|
||||||
|
"qwen2.5-coder:32b"
|
||||||
"nomic-embed-text:latest"
|
"nomic-embed-text:latest"
|
||||||
"llama4:scout"
|
"llama4:scout"
|
||||||
"mistral:7b"
|
"mistral:7b"
|
||||||
"deepseek-v3.2:cloud"
|
|
||||||
"minimax-m2.7:cloud"
|
"minimax-m2.7:cloud"
|
||||||
"lfm2:24b"
|
"lfm2:24b"
|
||||||
"glm-4.7-flash"
|
"glm-4.7-flash"
|
||||||
@@ -34,9 +33,9 @@ in
|
|||||||
FLASH_ATTENTION = "1";
|
FLASH_ATTENTION = "1";
|
||||||
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
||||||
# Ollama memory configuration
|
# Ollama memory configuration
|
||||||
OLLAMA_MAX_LOADED_MODELS = "2";
|
OLLAMA_MAX_LOADED_MODELS = "3";
|
||||||
OLLAMA_MAX_QUEUE = "4";
|
OLLAMA_MAX_QUEUE = "512";
|
||||||
OLLAMA_NUM_PARALLEL = "2";
|
OLLAMA_NUM_PARALLEL = "3";
|
||||||
|
|
||||||
# ROCm memory optimization
|
# ROCm memory optimization
|
||||||
#HIP_VISIBLE_DEVICES = "0";
|
#HIP_VISIBLE_DEVICES = "0";
|
||||||
|
|||||||
Reference in New Issue
Block a user