added support to unload ollama model when client requests.

2025-11-14 16:05:15 +01:00
parent 8282740de4
commit 2fbc1ffa6f
1 changed files with 6 additions and 0 deletions
@@ -314,6 +314,12 @@ class OllamaProxyHandler(http.server.SimpleHTTPRequestHandler):
            self.send_error(500, f"Internal server error: {e}")
    def _is_gpu_intensive_operation(self, path, request_data):
        if path == '/api/generate' and request_data.get('keep_alive') == 0:
            logging.debug("Detected model unload request via /api/generate")
            return False
        if path == '/api/chat' and request_data.get('keep_alive') == 0 and request_data.get('messages') == []:
            logging.debug("Detected model unload request via /api/chat")
            return False
        if path in ['/api/generate', '/api/chat', '/api/embeddings']:
            return True
        if path == '/api/load':