Source code for ollama_serve.main

"""
Core helpers for interacting with the Ollama server.
"""

import http.client
import json
import logging
import os
import shutil
import subprocess
import time
import warnings

HTTP_ERROR_STATUS = 400
DEFAULT_TIMEOUT = float(os.getenv("OLLAMA_SERVE_TIMEOUT", "0.2"))
DEFAULT_RETRIES = int(os.getenv("OLLAMA_SERVE_RETRIES", "1"))
DEFAULT_RETRY_DELAY = float(os.getenv("OLLAMA_SERVE_RETRY_DELAY", "0.2"))

LOGGER = logging.getLogger("ollama_serve")



[docs]
def _resolve_retries(retries: int | None) -> int:
    if retries is None:
        return max(DEFAULT_RETRIES, 1)
    return max(retries, 1)




[docs]
def _resolve_timeout(timeout: float | None) -> float:
    if timeout is None:
        return DEFAULT_TIMEOUT
    return timeout




[docs]
def _resolve_retry_delay(retry_delay: float | None) -> float:
    if retry_delay is None:
        return DEFAULT_RETRY_DELAY
    return retry_delay




[docs]
def is_ollama_running(
    host: str = "127.0.0.1",
    port: int = 11434,
    timeout: float | None = None,
    retries: int | None = None,
    retry_delay: float | None = None,
) -> bool:
    """
    Return True when an Ollama server responds on the given host/port.

    We use a lightweight HTTP request to the tags endpoint to avoid
    false positives when another service is bound to the same port.

    Args:
        host: Hostname or IP address to probe.
        port: Port to probe.
        timeout: Socket timeout in seconds.
        retries: Number of attempts before returning False.
        retry_delay: Sleep duration between retries in seconds.

    Returns:
        True when the Ollama tags endpoint responds; otherwise False.
    """

    attempts = _resolve_retries(retries)
    wait = _resolve_retry_delay(retry_delay)
    resolved_timeout = _resolve_timeout(timeout)

    for attempt in range(attempts):
        conn: http.client.HTTPConnection | None = None
        try:
            conn = http.client.HTTPConnection(host, port, timeout=resolved_timeout)
            conn.request("GET", "/api/tags")
            response = conn.getresponse()
            if response.status >= HTTP_ERROR_STATUS:
                return False
            payload = response.read()
            data = json.loads(payload)
            return isinstance(data, dict) and "models" in data
        except (OSError, http.client.HTTPException, json.JSONDecodeError):
            if attempt == attempts - 1:
                return False
            time.sleep(wait)
        finally:
            if conn is not None:
                conn.close()

    return False




[docs]
def run_ollama_server(
    host: str = "127.0.0.1",
    port: int = 11434,
    timeout: float | None = None,
    retries: int | None = None,
    retry_delay: float | None = None,
) -> bool:
    """
    Start the Ollama server when it is not already running.

    Args:
        host: Hostname or IP address to probe.
        port: Port to probe.
        timeout: Socket timeout in seconds.
        retries: Number of attempts before returning False.
        retry_delay: Sleep duration between retries in seconds.

    Returns:
        True when the server is already running or successfully started;
        False when Ollama is not installed or fails to start.
    """

    if is_ollama_running(
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    ):
        return True

    if shutil.which("ollama") is None:
        warnings.warn(
            "Ollama does not appear to be installed or available on PATH. "
            "Install it from https://ollama.com/download and ensure the "
            "`ollama` command is accessible in this environment.",
            UserWarning,
            stacklevel=2,
        )
        return False

    LOGGER.info("Starting Ollama server.")
    subprocess.Popen(
        ["ollama", "serve"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        close_fds=True,
    )
    return is_ollama_running(
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    )




[docs]
def is_model_installed(
    model: str,
    host: str = "127.0.0.1",
    port: int = 11434,
    timeout: float | None = None,
    retries: int | None = None,
    retry_delay: float | None = None,
) -> bool:
    """
    Return True when the named model is present in Ollama.

    Args:
        model: Model name to look up (for example, "llama3" or "llama3:latest").
        host: Hostname or IP address to probe.
        port: Port to probe.
        timeout: Socket timeout in seconds.
        retries: Number of attempts before returning False.
        retry_delay: Sleep duration between retries in seconds.

    Returns:
        True when the model appears in the Ollama tags list; otherwise False.
    """

    attempts = _resolve_retries(retries)
    wait = _resolve_retry_delay(retry_delay)
    resolved_timeout = _resolve_timeout(timeout)

    for attempt in range(attempts):
        conn: http.client.HTTPConnection | None = None
        try:
            conn = http.client.HTTPConnection(host, port, timeout=resolved_timeout)
            conn.request("GET", "/api/tags")
            response = conn.getresponse()
            if response.status >= HTTP_ERROR_STATUS:
                return False
            payload = response.read()
            data = json.loads(payload)
            models = data.get("models", [])
            return any(
                isinstance(entry, dict) and entry.get("name") == model
                for entry in models
            )
        except (OSError, http.client.HTTPException, json.JSONDecodeError):
            if attempt == attempts - 1:
                return False
            time.sleep(wait)
        finally:
            if conn is not None:
                conn.close()

    return False




[docs]
def install_model(
    model: str,
    host: str = "127.0.0.1",
    port: int = 11434,
    timeout: float | None = None,
    retries: int | None = None,
    retry_delay: float | None = None,
) -> bool:
    """
    Install a model if it is not already present in Ollama.

    Args:
        model: Model name to install (for example, "llama3" or "llama3:latest").
        host: Hostname or IP address to probe.
        port: Port to probe.
        timeout: Socket timeout in seconds.
        retries: Number of attempts before returning False.
        retry_delay: Sleep duration between retries in seconds.

    Returns:
        True when the model is already installed or installs successfully;
        otherwise False.
    """

    if is_model_installed(
        model,
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    ):
        return True

    if not run_ollama_server(
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    ):
        return False

    try:
        LOGGER.info("Pulling Ollama model: %s", model)
        subprocess.run(
            ["ollama", "pull", model],
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
    except subprocess.CalledProcessError:
        return False

    return is_model_installed(
        model,
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    )




[docs]
def ensure_model_and_server_ready(
    model: str,
    host: str = "127.0.0.1",
    port: int = 11434,
    timeout: float | None = None,
    retries: int | None = None,
    retry_delay: float | None = None,
) -> bool:
    """
    Ensure the server is running and the requested model is available.

    Args:
        model: Model name to ensure is present (for example, "llama3:latest").
        host: Hostname or IP address to probe.
        port: Port to probe.
        timeout: Socket timeout in seconds.
        retries: Number of attempts before returning False.
        retry_delay: Sleep duration between retries in seconds.

    Returns:
        True when the server is running and the model is available; otherwise False.
    """

    if not run_ollama_server(
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    ):
        return False

    if is_model_installed(
        model,
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    ):
        return True

    return install_model(
        model,
        host=host,
        port=port,
        timeout=timeout,
        retries=retries,
        retry_delay=retry_delay,
    )