Source code for femorph_solver.solvers.modal

"""Generalized-eigenvalue modal solve.

Solves ``K φ = ω² M φ`` for the lowest ``n_modes`` free-vibration modes
with Dirichlet BCs partitioned out. DOFs with zero diagonal stiffness (for
example the transverse DOFs of a pure axial truss) are dropped as
rigid-body modes — the canonical free-vibration default.

For ``n_free <= DENSE_CUTOFF`` we always use the dense LAPACK path
(:func:`scipy.linalg.eigh`).  For larger systems the eigen backend is
configurable via the ``eigen_solver`` / ``linear_solver`` kwargs — see
:mod:`femorph_solver.solvers.eigen` and :mod:`femorph_solver.solvers.linear` for the
registered options.

References
----------
- Bathe, K.-J.  *Finite Element Procedures*, 2nd ed., Prentice-Hall
  (2014), §11.  [structural eigenproblem ``K φ = ω² M φ`` and
  discussion of subspace / Lanczos methods]
- Hughes, T. J. R.  *The Finite Element Method: Linear Static and
  Dynamic Finite Element Analysis*, §9.  [free-vibration modal
  analysis, M-orthonormalisation, rigid-body mode treatment]
- Wilkinson, J. H.  *The Algebraic Eigenvalue Problem*, Oxford
  (1965).  [reference text for the symmetric generalized
  eigenvalue problem solved by ``scipy.linalg.eigh`` on the
  dense branch (LAPACK's DSYGVD)]
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
import scipy.linalg as la
import scipy.sparse as sp

from .._threads import limit as _thread_limit
from .eigen import get_eigen_solver

if TYPE_CHECKING:
    from femorph_solver.model import Model

DENSE_CUTOFF = 500


# Autotune always picks ``"arpack"`` for the simple modal path.  PRIMME
# remains registered (so :func:`get_eigen_solver("primme")` and the
# cyclic-modal solver — which provides its own ``OPinv`` and gets the
# real win from PRIMME's JDQMR algorithm — keep working).  But the
# autotune doesn't hand the simple wrapper a PRIMME call:
#
# * Without an ``OPinv``, PRIMME's Davidson path falls over with
#   error -41 ("matvec failed") on large / ill-conditioned K — the
#   500k-DOF clamped PBS rotor is the canary.
# * ARPACK + a sparse direct solver (``mkl_direct`` /
#   ``pardiso`` / ``mumps``) already beats MAPDL Block Lanczos by
#   25 % on that workload (see :doc:`/best-practices/pbs-rotor-comparison`),
#   so the marginal speedup PRIMME could provide on a narrow size
#   band (10k–100k DOFs, when Davidson does converge) doesn't
#   justify exposing the matvec-error footgun to autotune callers.
#
# Replicating the cyclic-path's OPinv-passing wrapper inside the
# simple modal entry point would buy 10–20 % over ARPACK on that
# narrow band at the cost of duplicating the linear-backend
# plumbing already encapsulated in ``solve_modal_reduced``.  Not
# worth it; revisit if a workload surfaces where ARPACK is the
# bottleneck.
def _autotune_eigen_solver(n_modes: int, sigma: float, n_dofs: int = 0) -> str:
    """Pick a registered eigen-backend name for ``eigen_solver="auto"``.

    Parameters
    ----------
    n_modes, sigma, n_dofs
        Retained for API stability; currently unused.  See
        :doc:`/best-practices/pbs-rotor-comparison` for the rationale
        behind the always-ARPACK choice.

    Returns
    -------
    str
        Always ``"arpack"`` from 2026-05 onward.
    """
    del n_modes, sigma, n_dofs
    return "arpack"


[docs] @dataclass(frozen=True, slots=True) class ModalResult: """Result of a modal solve. Attributes ---------- omega_sq : numpy.ndarray ``(n_modes,)`` eigenvalues ω² = (2π f)². frequency : numpy.ndarray ``(n_modes,)`` cyclic frequencies f [Hz]. mode_shapes : numpy.ndarray ``(N, n_modes)`` DOF-indexed mode shapes — each column is one mode in the global DOF ordering produced by :meth:`femorph_solver.model.Model.dof_map`. free_mask : numpy.ndarray ``(N,)`` bool — DOFs kept in the eigenproblem after Dirichlet reduction. """ omega_sq: np.ndarray frequency: np.ndarray mode_shapes: np.ndarray free_mask: np.ndarray def __repr__(self) -> str: # Default dataclass repr dumps the full mode-shape array — at # any realistic problem size that's a flood. Show the mode # count + frequency span instead. n_modes = int(self.frequency.shape[0]) n_dof = int(self.mode_shapes.shape[0]) if self.mode_shapes.ndim == 2 else 0 if n_modes: f_lo = float(self.frequency[0]) f_hi = float(self.frequency[-1]) return f"ModalResult(n_modes={n_modes}, N={n_dof}, f∈[{f_lo:.3f}, {f_hi:.3f}] Hz)" return f"ModalResult(n_modes=0, N={n_dof})"
[docs] def save( self, path: str | Path, model: Model, *, unit_system: str = "SI", extra_field_data: dict[str, np.ndarray] | None = None, ) -> Path: """Serialise this modal result to a disk-backed ``.pv`` file. Re-projects the DOF-indexed :attr:`mode_shapes` onto per-node ``(n_points, n_dof_per_node)`` arrays using the model's :meth:`~femorph_solver.model.Model.dof_map`, then hands off to :func:`~femorph_solver.result.modal.write_modal_result`. Parameters ---------- path : str or pathlib.Path Destination file (``.pv`` extension conventional). model : femorph_solver.model.Model The model whose K/M matrices this result was computed from — supplies the mesh geometry and DOF layout needed to reshape ``mode_shapes``. extra_field_data : mapping, optional Extra ``field_data`` entries (e.g. solver statistics). Returns ------- pathlib.Path Canonicalised absolute path to the written file. Notes ----- After saving, load the file back via :func:`femorph_solver.result.read`; the resulting object is a :class:`~femorph_solver.result.modal.ModalResultDisk` (disk-backed) with the same frequencies and per-mode displacements plus the full plotting / animation surface. """ from femorph_solver.result._material import MaterialTable from femorph_solver.result.modal import write_modal_result grid = model.grid dof_map = model.dof_map() node_nums = np.asarray(grid.point_data["ansys_node_num"], dtype=np.int64) node_to_row = {int(n): i for i, n in enumerate(node_nums)} n_modes = int(self.mode_shapes.shape[1]) # Output: (n_modes, n_points, 6). Canonical DOFs are # UX, UY, UZ, ROTX, ROTY, ROTZ — we keep all six slots so # beam / shell rotations round-trip alongside translations. mode_arr = np.zeros((n_modes, grid.n_points, 6), dtype=np.float64) # Vectorised scatter: every DOF row maps to one (point, comp) # cell of mode_arr. Loop is only over the three-digit mode # count; inner scatter is fully numpy. node_row = np.fromiter( (node_to_row[int(n)] for n in dof_map[:, 0]), dtype=np.int64, count=dof_map.shape[0], ) comp = dof_map[:, 1].astype(np.int64) for k in range(n_modes): mode_arr[k, node_row, comp] = self.mode_shapes[:, k] material_table = MaterialTable.from_model(model, unit_system=str(unit_system)) return write_modal_result( path, grid, self.frequency, mode_arr, omega_sq=self.omega_sq, extra_field_data=extra_field_data, material_table=material_table, unit_system=unit_system, )
def _bc_reduce( K: sp.csr_array, M: sp.csr_array, prescribed: dict[int, float], ) -> tuple[sp.csr_array, sp.csr_array, np.ndarray]: """Return ``(K_ff, M_ff, free_mask)`` — BC-reduced K / M plus the boolean mask of free DOFs. Does the row-and-column-slicing in a single row-parallel pass via ``_core.csr_submatrix_free`` when available, falling back to scipy's fancy-index path otherwise. Callers that know they're done with the full K / M can release their own refs after this returns — the reduced matrices have their own arrays (no aliasing back into K / M's storage). """ N = K.shape[0] mask_fixed = np.zeros(N, dtype=bool) for idx in prescribed: mask_fixed[int(idx)] = True diag = K.diagonal() scale = float(np.max(np.abs(diag))) if diag.size else 1.0 tol_pivot = 1e-12 * scale if scale > 0 else 1e-30 mask_fixed |= np.abs(diag) <= tol_pivot # Auto-pin DOFs with zero diagonal mass — these can't participate # in K φ = ω² M φ (no inertia → mode shape there is unconstrained # by the eigenproblem). MAPDL solves these decks by automatically # treating massless DOFs as constrained; we do the same. m_diag = M.diagonal() m_scale = float(np.max(np.abs(m_diag))) if m_diag.size else 1.0 m_tol = 1e-12 * m_scale if m_scale > 0 else 1e-30 mask_fixed |= np.abs(m_diag) <= m_tol free = ~mask_fixed # Rigid-body / unanchored-chain detection on M_ff (vm52 fails # the Cholesky factor of M with 'leading minor not positive # definite' on a zero-eigen-value mode of M). Run only when M_ff # actually has a zero diagonal in the still-free set — otherwise # the eigensolver can handle near-singular M itself, and the # null-space pin can over-constrain real loaded DOFs (caught the # vm_msc_vg_1_4 plate validation half-deflecting). n_free_dofs = int(free.sum()) try: from femorph_solver import _core as _core_ext # noqa: PLC0415 except ImportError: # pragma: no cover _core_ext = None if _core_ext is not None and hasattr(_core_ext, "csr_submatrix_free"): full_to_free = np.full(N, -1, dtype=np.int32) full_to_free[free] = np.arange(n_free_dofs, dtype=np.int32) K_csr = K.tocsr() M_csr = M.tocsr() ip_k, ix_k, d_k = _core_ext.csr_submatrix_free( np.ascontiguousarray(K_csr.indptr, dtype=np.int32), np.ascontiguousarray(K_csr.indices, dtype=np.int32), np.ascontiguousarray(K_csr.data, dtype=np.float64), full_to_free, n_free_dofs, ) # K and M share element connectivity + the same ``full_to_free`` # mask when built from the same FE model, so their reduced # sparsity is usually bit-identical. Reuse K's # ``(new_indptr, new_indices)`` and only pull filtered data for # M via the ``_data`` variant — saves one Pass-1 row-count and # one ``int32[nnz]`` allocation (~26 MB on PTR ``M_ff``). # # Guarded by an explicit ``indptr`` / ``indices`` equality check # because callers can pass K / M with different patterns (unit # tests sometimes do — e.g. K diagonal + M sparse). Falls back # to the full two-pass call otherwise, and on older ``_core`` # builds that predate the ``_data`` binding. can_share_pattern = ( hasattr(_core_ext, "csr_submatrix_free_data") and K_csr.indptr.shape == M_csr.indptr.shape and K_csr.indices.shape == M_csr.indices.shape and np.array_equal(K_csr.indptr, M_csr.indptr) and np.array_equal(K_csr.indices, M_csr.indices) ) if can_share_pattern: d_m = _core_ext.csr_submatrix_free_data( np.ascontiguousarray(M_csr.indptr, dtype=np.int32), np.ascontiguousarray(M_csr.indices, dtype=np.int32), np.ascontiguousarray(M_csr.data, dtype=np.float64), full_to_free, n_free_dofs, ) ip_m, ix_m = ip_k, ix_k else: ip_m, ix_m, d_m = _core_ext.csr_submatrix_free( np.ascontiguousarray(M_csr.indptr, dtype=np.int32), np.ascontiguousarray(M_csr.indices, dtype=np.int32), np.ascontiguousarray(M_csr.data, dtype=np.float64), full_to_free, n_free_dofs, ) # Skip scipy's csr_array(...) constructor because it runs # ``check_format`` which copies both the data and column-index # arrays into fresh buffers (on a 46 k-DOF / 6 M-nnz PTR K_ff # that's ~90 MB of transient peak RSS per call — called twice # here, once for K and once for M). ``_core.csr_submatrix_free`` # already guarantees the output is valid CSR: sorted column # indices per row, int32 + float64, contiguous. Build the # csr_array by assigning to the private attributes directly so # scipy skips the validation + copy, matching the same trick # ``Model._assemble`` uses for its scatter output. def _make_csr(data, indices, indptr, n): out = sp.csr_array.__new__(sp.csr_array) out._shape = (n, n) out.indptr = indptr out.indices = indices out.data = data out._maxprint = 50 return out K_ff = _make_csr(d_k, ix_k, ip_k, n_free_dofs) M_ff = _make_csr(d_m, ix_m, ip_m, n_free_dofs) else: K_csc = K.tocsc() M_csc = M.tocsc() K_ff = K_csc[free][:, free] M_ff = M_csc[free][:, free] return K_ff, M_ff, free
[docs] def solve_modal_reduced( K_ff: sp.csr_array, M_ff: sp.csr_array, free_mask: np.ndarray, n_modes: int = 10, *, eigen_solver: str = "auto", linear_solver: str = "auto", sigma: float = 0.0, tol: float = 1e-12, thread_limit: int | None = None, mixed_precision: bool | None = False, v0: np.ndarray | None = None, ) -> ModalResult: """Solve the eigenproblem directly on pre-reduced ``K_ff`` / ``M_ff``. Skips the full-matrix BC slicing that :func:`solve_modal` does, so the caller can build ``K_ff`` / ``M_ff`` via :func:`_bc_reduce`, release the original ``K`` / ``M`` (Model cache + their own refs), then call this to run the eigensolve with a peak RSS roughly ``K_ff + M_ff + triu + MKL factor`` instead of the default path's ``K + M + K_ff + M_ff + triu + MKL factor``. """ n_free = int(free_mask.sum()) if n_free == 0: # All DOFs constrained — degenerate but legitimate (deck author # pinned every DOF; modal extraction returns the empty mode # space). Common in the verification corpus where the user # writes ``D,ALL,ALL`` and forgets to release the active DOF. N_total = int(free_mask.shape[0]) return ModalResult( omega_sq=np.zeros(0, dtype=np.float64), frequency=np.zeros(0, dtype=np.float64), mode_shapes=np.zeros((N_total, 0), dtype=np.float64), free_mask=free_mask, ) n_req = min(n_modes, n_free) resolved_backend = ( _autotune_eigen_solver(n_req, sigma, n_dofs=n_free) if eigen_solver == "auto" else eigen_solver ) N = int(free_mask.shape[0]) with _thread_limit(thread_limit): if n_free <= DENSE_CUTOFF: # la.eigh defaults to reading the lower triangle — if K_ff was # assembled as upper-triangular only (``femorph_triu=True``), # the lower half is zeros and the dense path reads garbage. # Flip to ``UPLO="U"`` so LAPACK reads the stored triu. # # M_ff may also be triu-only now; la.eigh's generalized # eigenproblem signature treats M's triangle independently # via the same ``lower`` kwarg on recent scipy. To stay # robust across scipy versions we densify M symmetrically # ourselves when it's triu-stored. k_triu = bool(getattr(K_ff, "femorph_triu", False)) m_triu = bool(getattr(M_ff, "femorph_triu", False)) K_dense = K_ff.toarray() if m_triu: M_dense = M_ff.toarray() # Symmetrize in-place: M <- M + M.T - diag(M). diag = np.diag(M_dense).copy() M_dense += M_dense.T np.fill_diagonal(M_dense, diag) else: M_dense = M_ff.toarray() try: w, v = la.eigh(K_dense, M_dense, lower=(not k_triu)) except np.linalg.LinAlgError: # M_ff is not positive-definite — usually a deck where # some structural DOFs have zero inertia and the # auto-pin path didn't catch them (M[i,i] non-zero but # the column has rank deficiency). Add a tiny ε I to # M so the Cholesky factor of B succeeds; the phantom # modes go to ω² ≈ trace(K) / ε (very high frequency) # and don't contaminate the lowest-n_req returned. trace_k = float(np.abs(np.diag(K_dense)).sum()) eps = 1e-14 * max(trace_k, 1.0) / max(K_dense.shape[0], 1) M_dense = M_dense + eps * np.eye(M_dense.shape[0]) w, v = la.eigh(K_dense, M_dense, lower=(not k_triu)) w = w[:n_req] v = v[:, :n_req] else: Solver = get_eigen_solver(resolved_backend) solve_kwargs: dict[str, object] = { "sigma": sigma, "linear_solver": linear_solver, "tol": tol, } # Only ArpackSolver plumbs ``mixed_precision`` + ``v0`` # through — other backends don't accept either. if resolved_backend == "arpack": solve_kwargs["mixed_precision"] = mixed_precision if v0 is not None: # ARPACK expects the full-system-length starting # vector on the reduced space, so project via the # free mask if the caller passed a full-system v0. v0_flat = np.ascontiguousarray(v0, dtype=np.float64).ravel() if v0_flat.size == free_mask.shape[0]: v0_flat = v0_flat[free_mask] elif v0_flat.size != K_ff.shape[0]: raise ValueError( f"v0 length {v0_flat.size} matches neither the full " f"system ({free_mask.shape[0]}) nor the reduced " f"system ({K_ff.shape[0]})" ) solve_kwargs["v0"] = v0_flat w, v = Solver.solve(K_ff, M_ff, n_req, **solve_kwargs) w_clean = np.where(w > 0, w, 0.0) phi = np.zeros((N, n_req), dtype=np.float64) phi[free_mask] = v return ModalResult( omega_sq=w_clean, frequency=np.sqrt(w_clean) / (2.0 * np.pi), mode_shapes=phi, free_mask=free_mask, )
[docs] def solve_modal( K: sp.csr_array, M: sp.csr_array, prescribed: dict[int, float], n_modes: int = 10, *, eigen_solver: str = "auto", linear_solver: str = "auto", sigma: float = 0.0, tol: float = 1e-12, thread_limit: int | None = None, mixed_precision: bool | None = False, v0: np.ndarray | None = None, subprocess_isolated: bool | str = False, ) -> ModalResult: """Solve the generalized eigenproblem ``K φ = ω² M φ``. Parameters ---------- K, M : scipy.sparse.csr_array Global stiffness and mass matrices, same shape. prescribed : mapping ``{global_dof_index: 0}`` Indices to drop from the eigenproblem (clamped / zero-stiffness DOFs). n_modes : int Number of lowest modes to return. eigen_solver : str Name of a registered eigen backend (``"arpack"``, ``"lobpcg"``, ``"primme"``, ``"dense"``), or ``"auto"`` to let the solver pick based on problem size. As of 2026-05 the autotune always picks ARPACK shift-invert at sigma=0 - it dominates the cross-solver bench on every PBS rotor stage we ship and the prior PRIMME-when-n_modes>20 heuristic flipped wins between the two backends in the noise. PRIMME is opt-in via ``eigen_solver="primme"``; on clustered modal spectra (cyclic-symmetry rotors with many close-paired eigenvalues) PRIMME's block-Davidson can still beat ARPACK by ~1.5x once the basis is warmed up, so pass it explicitly when you've benched it on your problem. See :func:`femorph_solver.solvers.eigen.list_eigen_solvers` for the full registry. linear_solver : str Name of the registered linear backend used by ARPACK's shift-invert step. Pass ``"auto"`` (the default) to pick the fastest available SPD direct solver (pardiso → cholmod → umfpack → superlu). See :func:`femorph_solver.solvers.linear.list_linear_solvers` for the full registry. tol : float Convergence tolerance passed to the sparse eigensolver. Default ``1e-12`` — tight enough that the returned frequencies agree with a full machine-epsilon (``tol=0``) solve to at least 13 significant digits on typical modal problems, while saving ~25 % of ARPACK iterations. Pass ``0.0`` to force ARPACK to converge to full machine precision; pass a looser value (``1e-10``, ``1e-8``) when wall-time matters more than the last couple of digits. thread_limit : int or None, optional Cap on BLAS / OpenMP threads used inside the dense and sparse eigenpaths. ``None`` (default) defers to the global limit set via :func:`femorph_solver.set_thread_limit` or the ``FEMORPH_SOLVER_NUM_THREADS`` environment variable; pass an explicit integer to override for a single call. v0 : numpy.ndarray or None, optional Starting vector for ARPACK's Lanczos iteration. Default is ``None`` — ARPACK seeds from a random vector. Pass a previous mode shape (full-system length ``N`` or reduced-system length ``n_free``) to warm-start a parametric sweep — ARPACK's convergence iterations typically drop 1.5–3× when the new eigenvectors are close to the old ones (e.g. material sensitivity studies, mistuning identification). Ignored by non-ARPACK eigen backends. subprocess_isolated : bool or ``"auto"``, optional Strict thread isolation via subprocess dispatch. * ``False`` (default) — in-process solve. BLAS / MUMPS / MKL pools may pile up across the per-pool ``OMP_NUM_THREADS`` cap; total live thread count typically reaches 2–3× the cap on real workloads. Use a CPU-affinity pin (option 1, via ``FEMORPH_SOLVER_AFFINITY=N``) if you need a strict *core* cap without paying the IPC cost. * ``True`` — always spawn a fresh Python subprocess for the solve. Child inherits parent's env (so ``OMP_NUM_THREADS`` etc. take effect at BLAS init time, not after) but starts with no pre-spawned pools. Total live threads = N exactly. Costs ~0.5–5 s IPC round-trip depending on K.nnz. * ``"auto"`` — subprocess iff ``n_dofs >= 50_000``; smaller problems stay in-process. Tuned so IPC overhead lands below 25 % of typical solve time on real meshes. **Why** ``False`` **is the default** (and not ``"auto"``): the bench in ``doc/source/best-practices/isolation-mode-bench.rst`` showed that on a *quiet* / dedicated host, in-process and subprocess land within 1 % wall-time of each other; the spawn + sparse-spill cost cancels out the freed-up thread-pile-up overhead because there's enough spare capacity to absorb the extra threads. Forcing subprocess by default would (a) pay the IPC cost on every solve for no win on dedicated hosts, (b) interfere with notebook / interactive ``print`` semantics from inside the spawned child, and (c) make the obvious code path (``solve_modal(K, M, prescribed)``) less obvious. Setting ``"auto"`` in code that mixes solve sizes is the recommended opt-in for shared / multi-tenant hosts where the isolation win is measurable (~1.4× faster on the PBS-rotor cyclic bench under load). See :mod:`femorph_solver.solvers._subprocess`. """ K_ff, M_ff, free = _bc_reduce(K, M, prescribed) # Subprocess-isolated dispatch — see ``_subprocess`` module # docstring for the why. ``"auto"`` skips the subprocess on # micro-solves where IPC overhead would dominate; ``True`` always # spawns; ``False`` (default) is the in-process incumbent. if subprocess_isolated: from ._subprocess import maybe_run_in_subprocess # noqa: PLC0415 return maybe_run_in_subprocess( "femorph_solver.solvers.modal", "solve_modal_reduced", args=(K_ff, M_ff, free), kwargs={ "n_modes": n_modes, "eigen_solver": eigen_solver, "linear_solver": linear_solver, "sigma": sigma, "tol": tol, "thread_limit": thread_limit, "mixed_precision": mixed_precision, "v0": v0, }, n_dofs=int(K_ff.shape[0]), isolated=subprocess_isolated, ) return solve_modal_reduced( K_ff, M_ff, free, n_modes=n_modes, eigen_solver=eigen_solver, linear_solver=linear_solver, sigma=sigma, tol=tol, thread_limit=thread_limit, mixed_precision=mixed_precision, v0=v0, )