ollama stuff
This commit is contained in:
@@ -15,16 +15,11 @@ in
|
||||
"deepseek-r1:1.5b"
|
||||
"deepseek-r1:32b"
|
||||
"deepseek-r1:70b"
|
||||
"qwen3"
|
||||
#"qwen3-coder-next"
|
||||
#"qwen3"
|
||||
#"qwen3.5:latest"
|
||||
"qwen3-coder-next"
|
||||
"lennyerik/zeta"
|
||||
"qwen2.5-coder:14b"
|
||||
"qwen2.5-coder:32b"
|
||||
"nomic-embed-text:latest"
|
||||
"llama4:scout"
|
||||
"mistral:7b"
|
||||
"minimax-m2.7:cloud"
|
||||
"lfm2:24b"
|
||||
"glm-4.7-flash"
|
||||
];
|
||||
@@ -33,9 +28,9 @@ in
|
||||
FLASH_ATTENTION = "1";
|
||||
OLLAMA_KV_CACHE_TYPE = "q8_0";
|
||||
# Ollama memory configuration
|
||||
OLLAMA_MAX_LOADED_MODELS = "3";
|
||||
OLLAMA_MAX_LOADED_MODELS = "2";
|
||||
OLLAMA_MAX_QUEUE = "512";
|
||||
OLLAMA_NUM_PARALLEL = "3";
|
||||
OLLAMA_NUM_PARALLEL = "1";
|
||||
|
||||
# ROCm memory optimization
|
||||
#HIP_VISIBLE_DEVICES = "0";
|
||||
|
||||
Reference in New Issue
Block a user