Source code for femorph_solver.estimators._host

"""HostSpec — per-machine feature vector for the estimator.

Captures the hardware + software axes that move wall-time and peak
RSS on modal solves: CPU count, RAM, BLAS vendor / thread count,
MKL availability.  Extensible: the :class:`HostSpec` dataclass
carries an ``extras`` dict so future bench rows can ship any
field a retraining pass wants to grade as predictive without
breaking the loader.
"""

from __future__ import annotations

import contextlib
import os
import platform
from dataclasses import dataclass, field
from pathlib import Path



[docs]
@dataclass(frozen=True)
class HostSpec:
    """Per-host feature vector — stable across solves on one box.

    The fields are what we can introspect cheaply from a running
    process + the kernel's ``/proc`` interfaces.  Missing fields
    use sensible defaults so an ``HostSpec.auto()`` call can't fail.
    """

    cpu_model: str = "unknown"
    n_cores_total: int = 1
    n_cores_affinity: int = 1
    ram_mb: float = 0.0
    os_name: str = "unknown"
    arch: str = "unknown"
    has_mkl: bool = False
    mkl_version: str = ""
    #: Whatever additional fields the TA-6 benchmark shipped that
    #: the current estimator doesn't use.  Keeping the payload
    #: round-trippable means later retrain passes can pick up
    #: extras as new features without schema migration.
    extras: dict[str, str] = field(default_factory=dict)


[docs]
    @classmethod
    def auto(cls) -> HostSpec:
        """Introspect the current machine — never raises.

        Every branch has a graceful fallback; in a sandbox or on
        non-Linux kernels the values that can't be probed stay at
        their defaults instead of crashing the estimator.
        """
        cpu_model = "unknown"
        proc = Path("/proc/cpuinfo")
        if proc.is_file():
            with contextlib.suppress(OSError):
                for line in proc.read_text().splitlines():
                    if line.startswith("model name"):
                        cpu_model = line.split(":", 1)[1].strip()
                        break
        if cpu_model == "unknown":
            cpu_model = platform.processor() or "unknown"

        n_total = os.cpu_count() or 1
        try:
            n_affinity = len(os.sched_getaffinity(0))
        except (AttributeError, OSError):
            n_affinity = n_total

        ram_mb = 0.0
        mem = Path("/proc/meminfo")
        if mem.is_file():
            with contextlib.suppress(OSError, ValueError):
                for line in mem.read_text().splitlines():
                    if line.startswith("MemTotal:"):
                        ram_mb = int(line.split()[1]) / 1024.0
                        break
        if ram_mb == 0.0:
            with contextlib.suppress(ImportError):
                import psutil  # noqa: PLC0415

                ram_mb = psutil.virtual_memory().total / (1024 * 1024)

        has_mkl = False
        mkl_version = ""
        with contextlib.suppress(ImportError, Exception):
            from femorph_solver.report import _mkl_version  # noqa: PLC0415

            v = _mkl_version()
            has_mkl = v not in ("not loaded", "loaded (version string unavailable)")
            mkl_version = v if has_mkl else ""

        return cls(
            cpu_model=cpu_model,
            n_cores_total=n_total,
            n_cores_affinity=n_affinity,
            ram_mb=ram_mb,
            os_name=platform.system(),
            arch=platform.machine(),
            has_mkl=has_mkl,
            mkl_version=mkl_version,
        )



[docs]
    def signature(self) -> str:
        """Stable string identifier for "same host" checks.

        Two rows with matching signatures can train each other's
        estimator coefficients; rows with different signatures go
        into the shared prior.  The signature intentionally
        ignores transient things like affinity or MKL patch level
        — those are fields the estimator can regress on, but they
        don't change the fundamental silicon.
        """
        return f"{self.cpu_model}|{self.n_cores_total}|{int(self.ram_mb / 1024)}G|{self.os_name}|{self.arch}"