Source code for femorph_solver.solvers.modal

"""Generalized-eigenvalue modal solve.

Solves ``K φ = ω² M φ`` for the lowest ``n_modes`` free-vibration modes
with Dirichlet BCs partitioned out. DOFs with zero diagonal stiffness (for
example the transverse DOFs of a pure axial truss) are dropped as
rigid-body modes — the canonical free-vibration default.

For ``n_free <= DENSE_CUTOFF`` we always use the dense LAPACK path
(:func:`scipy.linalg.eigh`).  For larger systems the eigen backend is
configurable via the ``eigen_solver`` / ``linear_solver`` kwargs — see
:mod:`femorph_solver.solvers.eigen` and :mod:`femorph_solver.solvers.linear` for the
registered options.

References
----------
- Bathe, K.-J.  *Finite Element Procedures*, 2nd ed., Prentice-Hall
  (2014), §11.  [structural eigenproblem ``K φ = ω² M φ`` and
  discussion of subspace / Lanczos methods]
- Hughes, T. J. R.  *The Finite Element Method: Linear Static and
  Dynamic Finite Element Analysis*, §9.  [free-vibration modal
  analysis, M-orthonormalisation, rigid-body mode treatment]
- Wilkinson, J. H.  *The Algebraic Eigenvalue Problem*, Oxford
  (1965).  [reference text for the symmetric generalized
  eigenvalue problem solved by ``scipy.linalg.eigh`` on the
  dense branch (LAPACK's DSYGVD)]
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
import scipy.linalg as la
import scipy.sparse as sp

from .._threads import limit as _thread_limit
from .eigen import get_eigen_solver, list_eigen_solvers

if TYPE_CHECKING:
    from femorph_solver.model import Model

DENSE_CUTOFF = 500

# Autotune heuristic thresholds (T4-P13).  Clustered eigenvalues
# (common in rotor modes) converge faster under PRIMME's
# block-Davidson than ARPACK's Lanczos, but only once ``n_modes`` is
# large enough to amortise PRIMME's per-call overhead.  Calibrated
# against ptr.cdb: at n_modes=50 PRIMME is ≥30% faster; below ~20 the
# two are within noise.  Only applies when PRIMME is installed and
# the call is using the natural σ=0 regime (shift-invert with σ≠0
# stays on ARPACK — its complex-shift path is the incumbent).
_AUTOTUNE_PRIMME_MIN_MODES = 20


def _autotune_eigen_solver(n_modes: int, sigma: float) -> str:
    """Pick a registered eigen-backend name for ``eigen_solver="auto"``.

    Heuristic:

    1. ``sigma != 0`` → ``"arpack"`` (shift-invert incumbent).
    2. ``n_modes > _AUTOTUNE_PRIMME_MIN_MODES`` AND PRIMME is installed →
       ``"primme"``.
    3. Otherwise → ``"arpack"``.

    The returned name is always a backend that :func:`get_eigen_solver`
    can resolve — we check availability before naming PRIMME.
    """
    if sigma != 0.0 or n_modes <= _AUTOTUNE_PRIMME_MIN_MODES:
        return "arpack"
    primme_available = any(
        entry["name"] == "primme" and bool(entry["available"]) for entry in list_eigen_solvers()
    )
    return "primme" if primme_available else "arpack"



[docs]
@dataclass
class ModalResult:
    """Result of a modal solve.

    Attributes
    ----------
    omega_sq : numpy.ndarray
        ``(n_modes,)`` eigenvalues ω² = (2π f)².
    frequency : numpy.ndarray
        ``(n_modes,)`` cyclic frequencies f [Hz].
    mode_shapes : numpy.ndarray
        ``(N, n_modes)`` DOF-indexed mode shapes — each column is one
        mode in the global DOF ordering produced by
        :meth:`femorph_solver.model.Model.dof_map`.
    free_mask : numpy.ndarray
        ``(N,)`` bool — DOFs kept in the eigenproblem after Dirichlet
        reduction.
    """

    omega_sq: np.ndarray
    frequency: np.ndarray
    mode_shapes: np.ndarray
    free_mask: np.ndarray

    def __repr__(self) -> str:
        # Default dataclass repr dumps the full mode-shape array — at
        # any realistic problem size that's a flood.  Show the mode
        # count + frequency span instead.
        n_modes = int(self.frequency.shape[0])
        n_dof = int(self.mode_shapes.shape[0]) if self.mode_shapes.ndim == 2 else 0
        if n_modes:
            f_lo = float(self.frequency[0])
            f_hi = float(self.frequency[-1])
            return f"ModalResult(n_modes={n_modes}, N={n_dof}, f∈[{f_lo:.3f}, {f_hi:.3f}] Hz)"
        return f"ModalResult(n_modes=0, N={n_dof})"


[docs]
    def save(
        self,
        path: str | Path,
        model: Model,
        *,
        unit_system: str = "SI",
        extra_field_data: dict[str, np.ndarray] | None = None,
    ) -> Path:
        """Serialise this modal result to a disk-backed ``.pv`` file.

        Re-projects the DOF-indexed :attr:`mode_shapes` onto per-node
        ``(n_points, n_dof_per_node)`` arrays using the model's
        :meth:`~femorph_solver.model.Model.dof_map`, then hands off to
        :func:`~femorph_solver.result.modal.write_modal_result`.

        Parameters
        ----------
        path : str or pathlib.Path
            Destination file (``.pv`` extension conventional).
        model : femorph_solver.model.Model
            The model whose K/M matrices this result was computed
            from — supplies the mesh geometry and DOF layout
            needed to reshape ``mode_shapes``.
        extra_field_data : mapping, optional
            Extra ``field_data`` entries (e.g. solver statistics).

        Returns
        -------
        pathlib.Path
            Canonicalised absolute path to the written file.

        Notes
        -----
        After saving, load the file back via
        :func:`femorph_solver.result.read`; the resulting object is a
        :class:`~femorph_solver.result.modal.ModalResult` (disk-backed)
        with the same frequencies and per-mode displacements plus the
        full plotting / animation surface.
        """
        from femorph_solver.result._material import MaterialTable
        from femorph_solver.result.modal import write_modal_result

        grid = model.grid
        dof_map = model.dof_map()
        node_nums = np.asarray(grid.point_data["ansys_node_num"], dtype=np.int64)
        node_to_row = {int(n): i for i, n in enumerate(node_nums)}

        n_modes = int(self.mode_shapes.shape[1])
        # Output: (n_modes, n_points, 6).  Canonical DOFs are
        # UX, UY, UZ, ROTX, ROTY, ROTZ — we keep all six slots so
        # beam / shell rotations round-trip alongside translations.
        mode_arr = np.zeros((n_modes, grid.n_points, 6), dtype=np.float64)

        # Vectorised scatter: every DOF row maps to one (point, comp)
        # cell of mode_arr.  Loop is only over the three-digit mode
        # count; inner scatter is fully numpy.
        node_row = np.fromiter(
            (node_to_row[int(n)] for n in dof_map[:, 0]),
            dtype=np.int64,
            count=dof_map.shape[0],
        )
        comp = dof_map[:, 1].astype(np.int64)
        for k in range(n_modes):
            mode_arr[k, node_row, comp] = self.mode_shapes[:, k]

        material_table = MaterialTable.from_model(model, unit_system=str(unit_system))
        return write_modal_result(
            path,
            grid,
            self.frequency,
            mode_arr,
            omega_sq=self.omega_sq,
            extra_field_data=extra_field_data,
            material_table=material_table,
            unit_system=unit_system,
        )




def _bc_reduce(
    K: sp.csr_array,
    M: sp.csr_array,
    prescribed: dict[int, float],
) -> tuple[sp.csr_array, sp.csr_array, np.ndarray]:
    """Return ``(K_ff, M_ff, free_mask)`` — BC-reduced K / M plus the
    boolean mask of free DOFs.

    Does the row-and-column-slicing in a single row-parallel pass via
    ``_core.csr_submatrix_free`` when available, falling back to
    scipy's fancy-index path otherwise.  Callers that know they're
    done with the full K / M can release their own refs after this
    returns — the reduced matrices have their own arrays (no aliasing
    back into K / M's storage).
    """
    N = K.shape[0]
    mask_fixed = np.zeros(N, dtype=bool)
    for idx in prescribed:
        mask_fixed[int(idx)] = True
    diag = K.diagonal()
    scale = float(np.max(np.abs(diag))) if diag.size else 1.0
    tol_pivot = 1e-12 * scale if scale > 0 else 1e-30
    mask_fixed |= np.abs(diag) <= tol_pivot
    free = ~mask_fixed
    n_free_dofs = int(free.sum())

    try:
        from femorph_solver import _core as _core_ext  # noqa: PLC0415
    except ImportError:  # pragma: no cover
        _core_ext = None

    if _core_ext is not None and hasattr(_core_ext, "csr_submatrix_free"):
        full_to_free = np.full(N, -1, dtype=np.int32)
        full_to_free[free] = np.arange(n_free_dofs, dtype=np.int32)
        K_csr = K.tocsr()
        M_csr = M.tocsr()
        ip_k, ix_k, d_k = _core_ext.csr_submatrix_free(
            np.ascontiguousarray(K_csr.indptr, dtype=np.int32),
            np.ascontiguousarray(K_csr.indices, dtype=np.int32),
            np.ascontiguousarray(K_csr.data, dtype=np.float64),
            full_to_free,
            n_free_dofs,
        )
        # K and M share element connectivity + the same ``full_to_free``
        # mask when built from the same FE model, so their reduced
        # sparsity is usually bit-identical.  Reuse K's
        # ``(new_indptr, new_indices)`` and only pull filtered data for
        # M via the ``_data`` variant — saves one Pass-1 row-count and
        # one ``int32[nnz]`` allocation (~26 MB on PTR ``M_ff``).
        #
        # Guarded by an explicit ``indptr`` / ``indices`` equality check
        # because callers can pass K / M with different patterns (unit
        # tests sometimes do — e.g. K diagonal + M sparse).  Falls back
        # to the full two-pass call otherwise, and on older ``_core``
        # builds that predate the ``_data`` binding.
        can_share_pattern = (
            hasattr(_core_ext, "csr_submatrix_free_data")
            and K_csr.indptr.shape == M_csr.indptr.shape
            and K_csr.indices.shape == M_csr.indices.shape
            and np.array_equal(K_csr.indptr, M_csr.indptr)
            and np.array_equal(K_csr.indices, M_csr.indices)
        )
        if can_share_pattern:
            d_m = _core_ext.csr_submatrix_free_data(
                np.ascontiguousarray(M_csr.indptr, dtype=np.int32),
                np.ascontiguousarray(M_csr.indices, dtype=np.int32),
                np.ascontiguousarray(M_csr.data, dtype=np.float64),
                full_to_free,
                n_free_dofs,
            )
            ip_m, ix_m = ip_k, ix_k
        else:
            ip_m, ix_m, d_m = _core_ext.csr_submatrix_free(
                np.ascontiguousarray(M_csr.indptr, dtype=np.int32),
                np.ascontiguousarray(M_csr.indices, dtype=np.int32),
                np.ascontiguousarray(M_csr.data, dtype=np.float64),
                full_to_free,
                n_free_dofs,
            )

        # Skip scipy's csr_array(...) constructor because it runs
        # ``check_format`` which copies both the data and column-index
        # arrays into fresh buffers (on a 46 k-DOF / 6 M-nnz PTR K_ff
        # that's ~90 MB of transient peak RSS per call — called twice
        # here, once for K and once for M).  ``_core.csr_submatrix_free``
        # already guarantees the output is valid CSR: sorted column
        # indices per row, int32 + float64, contiguous.  Build the
        # csr_array by assigning to the private attributes directly so
        # scipy skips the validation + copy, matching the same trick
        # ``Model._assemble`` uses for its scatter output.
        def _make_csr(data, indices, indptr, n):
            out = sp.csr_array.__new__(sp.csr_array)
            out._shape = (n, n)
            out.indptr = indptr
            out.indices = indices
            out.data = data
            out._maxprint = 50
            return out

        K_ff = _make_csr(d_k, ix_k, ip_k, n_free_dofs)
        M_ff = _make_csr(d_m, ix_m, ip_m, n_free_dofs)
    else:
        K_csc = K.tocsc()
        M_csc = M.tocsc()
        K_ff = K_csc[free][:, free]
        M_ff = M_csc[free][:, free]
    return K_ff, M_ff, free



[docs]
def solve_modal_reduced(
    K_ff: sp.csr_array,
    M_ff: sp.csr_array,
    free_mask: np.ndarray,
    n_modes: int = 10,
    *,
    eigen_solver: str = "arpack",
    linear_solver: str = "auto",
    sigma: float = 0.0,
    tol: float = 1e-12,
    thread_limit: int | None = None,
    mixed_precision: bool | None = False,
    v0: np.ndarray | None = None,
) -> ModalResult:
    """Solve the eigenproblem directly on pre-reduced ``K_ff`` / ``M_ff``.

    Skips the full-matrix BC slicing that :func:`solve_modal` does, so
    the caller can build ``K_ff`` / ``M_ff`` via :func:`_bc_reduce`,
    release the original ``K`` / ``M`` (Model cache + their own refs),
    then call this to run the eigensolve with a peak RSS roughly
    ``K_ff + M_ff + triu + MKL factor`` instead of the default path's
    ``K + M + K_ff + M_ff + triu + MKL factor``.
    """
    n_free = int(free_mask.sum())
    if n_free == 0:
        raise ValueError("no free DOFs remain after BCs — nothing to solve")
    n_req = min(n_modes, n_free)
    resolved_backend = (
        _autotune_eigen_solver(n_req, sigma) if eigen_solver == "auto" else eigen_solver
    )
    N = int(free_mask.shape[0])

    with _thread_limit(thread_limit):
        if n_free <= DENSE_CUTOFF:
            # la.eigh defaults to reading the lower triangle — if K_ff was
            # assembled as upper-triangular only (``femorph_triu=True``),
            # the lower half is zeros and the dense path reads garbage.
            # Flip to ``UPLO="U"`` so LAPACK reads the stored triu.
            #
            # M_ff may also be triu-only now; la.eigh's generalized
            # eigenproblem signature treats M's triangle independently
            # via the same ``lower`` kwarg on recent scipy.  To stay
            # robust across scipy versions we densify M symmetrically
            # ourselves when it's triu-stored.
            k_triu = bool(getattr(K_ff, "femorph_triu", False))
            m_triu = bool(getattr(M_ff, "femorph_triu", False))
            K_dense = K_ff.toarray()
            if m_triu:
                M_dense = M_ff.toarray()
                # Symmetrize in-place: M <- M + M.T - diag(M).
                diag = np.diag(M_dense).copy()
                M_dense += M_dense.T
                np.fill_diagonal(M_dense, diag)
            else:
                M_dense = M_ff.toarray()
            w, v = la.eigh(K_dense, M_dense, lower=(not k_triu))
            w = w[:n_req]
            v = v[:, :n_req]
        else:
            Solver = get_eigen_solver(resolved_backend)
            solve_kwargs: dict[str, object] = {
                "sigma": sigma,
                "linear_solver": linear_solver,
                "tol": tol,
            }
            # Only ArpackSolver plumbs ``mixed_precision`` + ``v0``
            # through — other backends don't accept either.
            if resolved_backend == "arpack":
                solve_kwargs["mixed_precision"] = mixed_precision
                if v0 is not None:
                    # ARPACK expects the full-system-length starting
                    # vector on the reduced space, so project via the
                    # free mask if the caller passed a full-system v0.
                    v0_flat = np.ascontiguousarray(v0, dtype=np.float64).ravel()
                    if v0_flat.size == free_mask.shape[0]:
                        v0_flat = v0_flat[free_mask]
                    elif v0_flat.size != K_ff.shape[0]:
                        raise ValueError(
                            f"v0 length {v0_flat.size} matches neither the full "
                            f"system ({free_mask.shape[0]}) nor the reduced "
                            f"system ({K_ff.shape[0]})"
                        )
                    solve_kwargs["v0"] = v0_flat
            w, v = Solver.solve(K_ff, M_ff, n_req, **solve_kwargs)

    w_clean = np.where(w > 0, w, 0.0)
    phi = np.zeros((N, n_req), dtype=np.float64)
    phi[free_mask] = v
    return ModalResult(
        omega_sq=w_clean,
        frequency=np.sqrt(w_clean) / (2.0 * np.pi),
        mode_shapes=phi,
        free_mask=free_mask,
    )




[docs]
def solve_modal(
    K: sp.csr_array,
    M: sp.csr_array,
    prescribed: dict[int, float],
    n_modes: int = 10,
    *,
    eigen_solver: str = "arpack",
    linear_solver: str = "auto",
    sigma: float = 0.0,
    tol: float = 1e-12,
    thread_limit: int | None = None,
    mixed_precision: bool | None = False,
    v0: np.ndarray | None = None,
) -> ModalResult:
    """Solve the generalized eigenproblem ``K φ = ω² M φ``.

    Parameters
    ----------
    K, M : scipy.sparse.csr_array
        Global stiffness and mass matrices, same shape.
    prescribed : mapping ``{global_dof_index: 0}``
        Indices to drop from the eigenproblem (clamped / zero-stiffness DOFs).
    n_modes : int
        Number of lowest modes to return.
    eigen_solver : str
        Name of a registered eigen backend (``"arpack"``, ``"lobpcg"``,
        ``"primme"``, ``"dense"``), or ``"auto"`` to let the solver
        pick based on problem size.  The autotune prefers PRIMME for
        ``n_modes > 20`` when the package is installed (faster on the
        clustered eigenvalue spectra typical of rotor modal analyses);
        it falls back to ARPACK otherwise.  See
        :func:`femorph_solver.solvers.eigen.list_eigen_solvers`.
    linear_solver : str
        Name of the registered linear backend used by ARPACK's
        shift-invert step.  Pass ``"auto"`` (the default) to pick the
        fastest available SPD direct solver (pardiso → cholmod → umfpack
        → superlu).  See :func:`femorph_solver.solvers.linear.list_linear_solvers`
        for the full registry.
    tol : float
        Convergence tolerance passed to the sparse eigensolver.
        Default ``1e-12`` — tight enough that the returned frequencies
        agree with a full machine-epsilon (``tol=0``) solve to at least
        13 significant digits on typical modal problems, while saving
        ~25 % of ARPACK iterations.  Pass ``0.0`` to force ARPACK to
        converge to full machine precision; pass a looser value
        (``1e-10``, ``1e-8``) when wall-time matters more than the last
        couple of digits.
    thread_limit : int or None, optional
        Cap on BLAS / OpenMP threads used inside the dense and sparse
        eigenpaths.  ``None`` (default) defers to the global limit set
        via :func:`femorph_solver.set_thread_limit` or the
        ``FEMORPH_SOLVER_NUM_THREADS`` environment variable; pass an
        explicit integer to override for a single call.
    v0 : numpy.ndarray or None, optional
        Starting vector for ARPACK's Lanczos iteration.  Default is
        ``None`` — ARPACK seeds from a random vector.  Pass a previous
        mode shape (full-system length ``N`` or reduced-system length
        ``n_free``) to warm-start a parametric sweep — ARPACK's
        convergence iterations typically drop 1.5–3× when the new
        eigenvectors are close to the old ones (e.g. material
        sensitivity studies, mistuning identification).  Ignored by
        non-ARPACK eigen backends.
    """
    K_ff, M_ff, free = _bc_reduce(K, M, prescribed)
    return solve_modal_reduced(
        K_ff,
        M_ff,
        free,
        n_modes=n_modes,
        eigen_solver=eigen_solver,
        linear_solver=linear_solver,
        sigma=sigma,
        tol=tol,
        thread_limit=thread_limit,
        mixed_precision=mixed_precision,
        v0=v0,
    )