added support to unload ollama model when client requests.

This commit is contained in:
mrmarcus007
2025-11-14 16:05:15 +01:00
parent 8282740de4
commit 2fbc1ffa6f
@@ -314,6 +314,12 @@ class OllamaProxyHandler(http.server.SimpleHTTPRequestHandler):
self.send_error(500, f"Internal server error: {e}") self.send_error(500, f"Internal server error: {e}")
def _is_gpu_intensive_operation(self, path, request_data): def _is_gpu_intensive_operation(self, path, request_data):
if path == '/api/generate' and request_data.get('keep_alive') == 0:
logging.debug("Detected model unload request via /api/generate")
return False
if path == '/api/chat' and request_data.get('keep_alive') == 0 and request_data.get('messages') == []:
logging.debug("Detected model unload request via /api/chat")
return False
if path in ['/api/generate', '/api/chat', '/api/embeddings']: if path in ['/api/generate', '/api/chat', '/api/embeddings']:
return True return True
if path == '/api/load': if path == '/api/load':