added support to unload ollama model when client requests.
This commit is contained in:
@@ -314,6 +314,12 @@ class OllamaProxyHandler(http.server.SimpleHTTPRequestHandler):
|
|||||||
self.send_error(500, f"Internal server error: {e}")
|
self.send_error(500, f"Internal server error: {e}")
|
||||||
|
|
||||||
def _is_gpu_intensive_operation(self, path, request_data):
|
def _is_gpu_intensive_operation(self, path, request_data):
|
||||||
|
if path == '/api/generate' and request_data.get('keep_alive') == 0:
|
||||||
|
logging.debug("Detected model unload request via /api/generate")
|
||||||
|
return False
|
||||||
|
if path == '/api/chat' and request_data.get('keep_alive') == 0 and request_data.get('messages') == []:
|
||||||
|
logging.debug("Detected model unload request via /api/chat")
|
||||||
|
return False
|
||||||
if path in ['/api/generate', '/api/chat', '/api/embeddings']:
|
if path in ['/api/generate', '/api/chat', '/api/embeddings']:
|
||||||
return True
|
return True
|
||||||
if path == '/api/load':
|
if path == '/api/load':
|
||||||
|
|||||||
Reference in New Issue
Block a user