"""Generalized-eigenvalue modal solve.
Solves ``K φ = ω² M φ`` for the lowest ``n_modes`` free-vibration modes
with Dirichlet BCs partitioned out. DOFs with zero diagonal stiffness (for
example the transverse DOFs of a pure axial truss) are dropped as
rigid-body modes — the canonical free-vibration default.
For ``n_free <= DENSE_CUTOFF`` we always use the dense LAPACK path
(:func:`scipy.linalg.eigh`). For larger systems the eigen backend is
configurable via the ``eigen_solver`` / ``linear_solver`` kwargs — see
:mod:`femorph_solver.solvers.eigen` and :mod:`femorph_solver.solvers.linear` for the
registered options.
References
----------
- Bathe, K.-J. *Finite Element Procedures*, 2nd ed., Prentice-Hall
(2014), §11. [structural eigenproblem ``K φ = ω² M φ`` and
discussion of subspace / Lanczos methods]
- Hughes, T. J. R. *The Finite Element Method: Linear Static and
Dynamic Finite Element Analysis*, §9. [free-vibration modal
analysis, M-orthonormalisation, rigid-body mode treatment]
- Wilkinson, J. H. *The Algebraic Eigenvalue Problem*, Oxford
(1965). [reference text for the symmetric generalized
eigenvalue problem solved by ``scipy.linalg.eigh`` on the
dense branch (LAPACK's DSYGVD)]
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING
import numpy as np
import scipy.linalg as la
import scipy.sparse as sp
from .._threads import limit as _thread_limit
from .eigen import get_eigen_solver
if TYPE_CHECKING:
from femorph_solver.model import Model
DENSE_CUTOFF = 500
# Autotune always picks ``"arpack"`` for the simple modal path. PRIMME
# remains registered (so :func:`get_eigen_solver("primme")` and the
# cyclic-modal solver — which provides its own ``OPinv`` and gets the
# real win from PRIMME's JDQMR algorithm — keep working). But the
# autotune doesn't hand the simple wrapper a PRIMME call:
#
# * Without an ``OPinv``, PRIMME's Davidson path falls over with
# error -41 ("matvec failed") on large / ill-conditioned K — the
# 500k-DOF clamped PBS rotor is the canary.
# * ARPACK + a sparse direct solver (``mkl_direct`` /
# ``pardiso`` / ``mumps``) already beats MAPDL Block Lanczos by
# 25 % on that workload (see :doc:`/best-practices/pbs-rotor-comparison`),
# so the marginal speedup PRIMME could provide on a narrow size
# band (10k–100k DOFs, when Davidson does converge) doesn't
# justify exposing the matvec-error footgun to autotune callers.
#
# Replicating the cyclic-path's OPinv-passing wrapper inside the
# simple modal entry point would buy 10–20 % over ARPACK on that
# narrow band at the cost of duplicating the linear-backend
# plumbing already encapsulated in ``solve_modal_reduced``. Not
# worth it; revisit if a workload surfaces where ARPACK is the
# bottleneck.
def _autotune_eigen_solver(n_modes: int, sigma: float, n_dofs: int = 0) -> str:
"""Pick a registered eigen-backend name for ``eigen_solver="auto"``.
Parameters
----------
n_modes, sigma, n_dofs
Retained for API stability; currently unused. See
:doc:`/best-practices/pbs-rotor-comparison` for the rationale
behind the always-ARPACK choice.
Returns
-------
str
Always ``"arpack"`` from 2026-05 onward.
"""
del n_modes, sigma, n_dofs
return "arpack"
[docs]
@dataclass(frozen=True, slots=True)
class ModalResult:
"""Result of a modal solve.
Attributes
----------
omega_sq : numpy.ndarray
``(n_modes,)`` eigenvalues ω² = (2π f)².
frequency : numpy.ndarray
``(n_modes,)`` cyclic frequencies f [Hz].
mode_shapes : numpy.ndarray
``(N, n_modes)`` DOF-indexed mode shapes — each column is one
mode in the global DOF ordering produced by
:meth:`femorph_solver.model.Model.dof_map`.
free_mask : numpy.ndarray
``(N,)`` bool — DOFs kept in the eigenproblem after Dirichlet
reduction.
"""
omega_sq: np.ndarray
frequency: np.ndarray
mode_shapes: np.ndarray
free_mask: np.ndarray
def __repr__(self) -> str:
# Default dataclass repr dumps the full mode-shape array — at
# any realistic problem size that's a flood. Show the mode
# count + frequency span instead.
n_modes = int(self.frequency.shape[0])
n_dof = int(self.mode_shapes.shape[0]) if self.mode_shapes.ndim == 2 else 0
if n_modes:
f_lo = float(self.frequency[0])
f_hi = float(self.frequency[-1])
return f"ModalResult(n_modes={n_modes}, N={n_dof}, f∈[{f_lo:.3f}, {f_hi:.3f}] Hz)"
return f"ModalResult(n_modes=0, N={n_dof})"
[docs]
def save(
self,
path: str | Path,
model: Model,
*,
unit_system: str = "SI",
extra_field_data: dict[str, np.ndarray] | None = None,
) -> Path:
"""Serialise this modal result to a disk-backed ``.pv`` file.
Re-projects the DOF-indexed :attr:`mode_shapes` onto per-node
``(n_points, n_dof_per_node)`` arrays using the model's
:meth:`~femorph_solver.model.Model.dof_map`, then hands off to
:func:`~femorph_solver.result.modal.write_modal_result`.
Parameters
----------
path : str or pathlib.Path
Destination file (``.pv`` extension conventional).
model : femorph_solver.model.Model
The model whose K/M matrices this result was computed
from — supplies the mesh geometry and DOF layout
needed to reshape ``mode_shapes``.
extra_field_data : mapping, optional
Extra ``field_data`` entries (e.g. solver statistics).
Returns
-------
pathlib.Path
Canonicalised absolute path to the written file.
Notes
-----
After saving, load the file back via
:func:`femorph_solver.result.read`; the resulting object is a
:class:`~femorph_solver.result.modal.ModalResultDisk` (disk-backed)
with the same frequencies and per-mode displacements plus the
full plotting / animation surface.
"""
from femorph_solver.result._material import MaterialTable
from femorph_solver.result.modal import write_modal_result
grid = model.grid
dof_map = model.dof_map()
node_nums = np.asarray(grid.point_data["ansys_node_num"], dtype=np.int64)
node_to_row = {int(n): i for i, n in enumerate(node_nums)}
n_modes = int(self.mode_shapes.shape[1])
# Output: (n_modes, n_points, 6). Canonical DOFs are
# UX, UY, UZ, ROTX, ROTY, ROTZ — we keep all six slots so
# beam / shell rotations round-trip alongside translations.
mode_arr = np.zeros((n_modes, grid.n_points, 6), dtype=np.float64)
# Vectorised scatter: every DOF row maps to one (point, comp)
# cell of mode_arr. Loop is only over the three-digit mode
# count; inner scatter is fully numpy.
node_row = np.fromiter(
(node_to_row[int(n)] for n in dof_map[:, 0]),
dtype=np.int64,
count=dof_map.shape[0],
)
comp = dof_map[:, 1].astype(np.int64)
for k in range(n_modes):
mode_arr[k, node_row, comp] = self.mode_shapes[:, k]
material_table = MaterialTable.from_model(model, unit_system=str(unit_system))
return write_modal_result(
path,
grid,
self.frequency,
mode_arr,
omega_sq=self.omega_sq,
extra_field_data=extra_field_data,
material_table=material_table,
unit_system=unit_system,
)
def _bc_reduce(
K: sp.csr_array,
M: sp.csr_array,
prescribed: dict[int, float],
) -> tuple[sp.csr_array, sp.csr_array, np.ndarray]:
"""Return ``(K_ff, M_ff, free_mask)`` — BC-reduced K / M plus the
boolean mask of free DOFs.
Does the row-and-column-slicing in a single row-parallel pass via
``_core.csr_submatrix_free`` when available, falling back to
scipy's fancy-index path otherwise. Callers that know they're
done with the full K / M can release their own refs after this
returns — the reduced matrices have their own arrays (no aliasing
back into K / M's storage).
"""
N = K.shape[0]
mask_fixed = np.zeros(N, dtype=bool)
for idx in prescribed:
mask_fixed[int(idx)] = True
diag = K.diagonal()
scale = float(np.max(np.abs(diag))) if diag.size else 1.0
tol_pivot = 1e-12 * scale if scale > 0 else 1e-30
mask_fixed |= np.abs(diag) <= tol_pivot
# Auto-pin DOFs with zero diagonal mass — these can't participate
# in K φ = ω² M φ (no inertia → mode shape there is unconstrained
# by the eigenproblem). MAPDL solves these decks by automatically
# treating massless DOFs as constrained; we do the same.
m_diag = M.diagonal()
m_scale = float(np.max(np.abs(m_diag))) if m_diag.size else 1.0
m_tol = 1e-12 * m_scale if m_scale > 0 else 1e-30
mask_fixed |= np.abs(m_diag) <= m_tol
free = ~mask_fixed
# Rigid-body / unanchored-chain detection on M_ff (vm52 fails
# the Cholesky factor of M with 'leading minor not positive
# definite' on a zero-eigen-value mode of M). Run only when M_ff
# actually has a zero diagonal in the still-free set — otherwise
# the eigensolver can handle near-singular M itself, and the
# null-space pin can over-constrain real loaded DOFs (caught the
# vm_msc_vg_1_4 plate validation half-deflecting).
n_free_dofs = int(free.sum())
try:
from femorph_solver import _core as _core_ext # noqa: PLC0415
except ImportError: # pragma: no cover
_core_ext = None
if _core_ext is not None and hasattr(_core_ext, "csr_submatrix_free"):
full_to_free = np.full(N, -1, dtype=np.int32)
full_to_free[free] = np.arange(n_free_dofs, dtype=np.int32)
K_csr = K.tocsr()
M_csr = M.tocsr()
ip_k, ix_k, d_k = _core_ext.csr_submatrix_free(
np.ascontiguousarray(K_csr.indptr, dtype=np.int32),
np.ascontiguousarray(K_csr.indices, dtype=np.int32),
np.ascontiguousarray(K_csr.data, dtype=np.float64),
full_to_free,
n_free_dofs,
)
# K and M share element connectivity + the same ``full_to_free``
# mask when built from the same FE model, so their reduced
# sparsity is usually bit-identical. Reuse K's
# ``(new_indptr, new_indices)`` and only pull filtered data for
# M via the ``_data`` variant — saves one Pass-1 row-count and
# one ``int32[nnz]`` allocation (~26 MB on PTR ``M_ff``).
#
# Guarded by an explicit ``indptr`` / ``indices`` equality check
# because callers can pass K / M with different patterns (unit
# tests sometimes do — e.g. K diagonal + M sparse). Falls back
# to the full two-pass call otherwise, and on older ``_core``
# builds that predate the ``_data`` binding.
can_share_pattern = (
hasattr(_core_ext, "csr_submatrix_free_data")
and K_csr.indptr.shape == M_csr.indptr.shape
and K_csr.indices.shape == M_csr.indices.shape
and np.array_equal(K_csr.indptr, M_csr.indptr)
and np.array_equal(K_csr.indices, M_csr.indices)
)
if can_share_pattern:
d_m = _core_ext.csr_submatrix_free_data(
np.ascontiguousarray(M_csr.indptr, dtype=np.int32),
np.ascontiguousarray(M_csr.indices, dtype=np.int32),
np.ascontiguousarray(M_csr.data, dtype=np.float64),
full_to_free,
n_free_dofs,
)
ip_m, ix_m = ip_k, ix_k
else:
ip_m, ix_m, d_m = _core_ext.csr_submatrix_free(
np.ascontiguousarray(M_csr.indptr, dtype=np.int32),
np.ascontiguousarray(M_csr.indices, dtype=np.int32),
np.ascontiguousarray(M_csr.data, dtype=np.float64),
full_to_free,
n_free_dofs,
)
# Skip scipy's csr_array(...) constructor because it runs
# ``check_format`` which copies both the data and column-index
# arrays into fresh buffers (on a 46 k-DOF / 6 M-nnz PTR K_ff
# that's ~90 MB of transient peak RSS per call — called twice
# here, once for K and once for M). ``_core.csr_submatrix_free``
# already guarantees the output is valid CSR: sorted column
# indices per row, int32 + float64, contiguous. Build the
# csr_array by assigning to the private attributes directly so
# scipy skips the validation + copy, matching the same trick
# ``Model._assemble`` uses for its scatter output.
def _make_csr(data, indices, indptr, n):
out = sp.csr_array.__new__(sp.csr_array)
out._shape = (n, n)
out.indptr = indptr
out.indices = indices
out.data = data
out._maxprint = 50
return out
K_ff = _make_csr(d_k, ix_k, ip_k, n_free_dofs)
M_ff = _make_csr(d_m, ix_m, ip_m, n_free_dofs)
else:
K_csc = K.tocsc()
M_csc = M.tocsc()
K_ff = K_csc[free][:, free]
M_ff = M_csc[free][:, free]
return K_ff, M_ff, free
[docs]
def solve_modal_reduced(
K_ff: sp.csr_array,
M_ff: sp.csr_array,
free_mask: np.ndarray,
n_modes: int = 10,
*,
eigen_solver: str = "auto",
linear_solver: str = "auto",
sigma: float = 0.0,
tol: float = 1e-12,
thread_limit: int | None = None,
mixed_precision: bool | None = False,
v0: np.ndarray | None = None,
) -> ModalResult:
"""Solve the eigenproblem directly on pre-reduced ``K_ff`` / ``M_ff``.
Skips the full-matrix BC slicing that :func:`solve_modal` does, so
the caller can build ``K_ff`` / ``M_ff`` via :func:`_bc_reduce`,
release the original ``K`` / ``M`` (Model cache + their own refs),
then call this to run the eigensolve with a peak RSS roughly
``K_ff + M_ff + triu + MKL factor`` instead of the default path's
``K + M + K_ff + M_ff + triu + MKL factor``.
"""
n_free = int(free_mask.sum())
if n_free == 0:
# All DOFs constrained — degenerate but legitimate (deck author
# pinned every DOF; modal extraction returns the empty mode
# space). Common in the verification corpus where the user
# writes ``D,ALL,ALL`` and forgets to release the active DOF.
N_total = int(free_mask.shape[0])
return ModalResult(
omega_sq=np.zeros(0, dtype=np.float64),
frequency=np.zeros(0, dtype=np.float64),
mode_shapes=np.zeros((N_total, 0), dtype=np.float64),
free_mask=free_mask,
)
n_req = min(n_modes, n_free)
resolved_backend = (
_autotune_eigen_solver(n_req, sigma, n_dofs=n_free)
if eigen_solver == "auto"
else eigen_solver
)
N = int(free_mask.shape[0])
with _thread_limit(thread_limit):
if n_free <= DENSE_CUTOFF:
# la.eigh defaults to reading the lower triangle — if K_ff was
# assembled as upper-triangular only (``femorph_triu=True``),
# the lower half is zeros and the dense path reads garbage.
# Flip to ``UPLO="U"`` so LAPACK reads the stored triu.
#
# M_ff may also be triu-only now; la.eigh's generalized
# eigenproblem signature treats M's triangle independently
# via the same ``lower`` kwarg on recent scipy. To stay
# robust across scipy versions we densify M symmetrically
# ourselves when it's triu-stored.
k_triu = bool(getattr(K_ff, "femorph_triu", False))
m_triu = bool(getattr(M_ff, "femorph_triu", False))
K_dense = K_ff.toarray()
if m_triu:
M_dense = M_ff.toarray()
# Symmetrize in-place: M <- M + M.T - diag(M).
diag = np.diag(M_dense).copy()
M_dense += M_dense.T
np.fill_diagonal(M_dense, diag)
else:
M_dense = M_ff.toarray()
try:
w, v = la.eigh(K_dense, M_dense, lower=(not k_triu))
except np.linalg.LinAlgError:
# M_ff is not positive-definite — usually a deck where
# some structural DOFs have zero inertia and the
# auto-pin path didn't catch them (M[i,i] non-zero but
# the column has rank deficiency). Add a tiny ε I to
# M so the Cholesky factor of B succeeds; the phantom
# modes go to ω² ≈ trace(K) / ε (very high frequency)
# and don't contaminate the lowest-n_req returned.
trace_k = float(np.abs(np.diag(K_dense)).sum())
eps = 1e-14 * max(trace_k, 1.0) / max(K_dense.shape[0], 1)
M_dense = M_dense + eps * np.eye(M_dense.shape[0])
w, v = la.eigh(K_dense, M_dense, lower=(not k_triu))
w = w[:n_req]
v = v[:, :n_req]
else:
Solver = get_eigen_solver(resolved_backend)
solve_kwargs: dict[str, object] = {
"sigma": sigma,
"linear_solver": linear_solver,
"tol": tol,
}
# Only ArpackSolver plumbs ``mixed_precision`` + ``v0``
# through — other backends don't accept either.
if resolved_backend == "arpack":
solve_kwargs["mixed_precision"] = mixed_precision
if v0 is not None:
# ARPACK expects the full-system-length starting
# vector on the reduced space, so project via the
# free mask if the caller passed a full-system v0.
v0_flat = np.ascontiguousarray(v0, dtype=np.float64).ravel()
if v0_flat.size == free_mask.shape[0]:
v0_flat = v0_flat[free_mask]
elif v0_flat.size != K_ff.shape[0]:
raise ValueError(
f"v0 length {v0_flat.size} matches neither the full "
f"system ({free_mask.shape[0]}) nor the reduced "
f"system ({K_ff.shape[0]})"
)
solve_kwargs["v0"] = v0_flat
w, v = Solver.solve(K_ff, M_ff, n_req, **solve_kwargs)
w_clean = np.where(w > 0, w, 0.0)
phi = np.zeros((N, n_req), dtype=np.float64)
phi[free_mask] = v
return ModalResult(
omega_sq=w_clean,
frequency=np.sqrt(w_clean) / (2.0 * np.pi),
mode_shapes=phi,
free_mask=free_mask,
)
[docs]
def solve_modal(
K: sp.csr_array,
M: sp.csr_array,
prescribed: dict[int, float],
n_modes: int = 10,
*,
eigen_solver: str = "auto",
linear_solver: str = "auto",
sigma: float = 0.0,
tol: float = 1e-12,
thread_limit: int | None = None,
mixed_precision: bool | None = False,
v0: np.ndarray | None = None,
subprocess_isolated: bool | str = False,
) -> ModalResult:
"""Solve the generalized eigenproblem ``K φ = ω² M φ``.
Parameters
----------
K, M : scipy.sparse.csr_array
Global stiffness and mass matrices, same shape.
prescribed : mapping ``{global_dof_index: 0}``
Indices to drop from the eigenproblem (clamped / zero-stiffness DOFs).
n_modes : int
Number of lowest modes to return.
eigen_solver : str
Name of a registered eigen backend (``"arpack"``, ``"lobpcg"``,
``"primme"``, ``"dense"``), or ``"auto"`` to let the solver
pick based on problem size. As of 2026-05 the autotune
always picks ARPACK shift-invert at sigma=0 - it dominates
the cross-solver bench on every PBS rotor stage we ship
and the prior PRIMME-when-n_modes>20 heuristic flipped
wins between the two backends in the noise. PRIMME is
opt-in via ``eigen_solver="primme"``; on clustered modal
spectra (cyclic-symmetry rotors with many close-paired
eigenvalues) PRIMME's block-Davidson can still beat ARPACK
by ~1.5x once the basis is warmed up, so pass it explicitly
when you've benched it on your problem. See
:func:`femorph_solver.solvers.eigen.list_eigen_solvers`
for the full registry.
linear_solver : str
Name of the registered linear backend used by ARPACK's
shift-invert step. Pass ``"auto"`` (the default) to pick the
fastest available SPD direct solver (pardiso → cholmod → umfpack
→ superlu). See :func:`femorph_solver.solvers.linear.list_linear_solvers`
for the full registry.
tol : float
Convergence tolerance passed to the sparse eigensolver.
Default ``1e-12`` — tight enough that the returned frequencies
agree with a full machine-epsilon (``tol=0``) solve to at least
13 significant digits on typical modal problems, while saving
~25 % of ARPACK iterations. Pass ``0.0`` to force ARPACK to
converge to full machine precision; pass a looser value
(``1e-10``, ``1e-8``) when wall-time matters more than the last
couple of digits.
thread_limit : int or None, optional
Cap on BLAS / OpenMP threads used inside the dense and sparse
eigenpaths. ``None`` (default) defers to the global limit set
via :func:`femorph_solver.set_thread_limit` or the
``FEMORPH_SOLVER_NUM_THREADS`` environment variable; pass an
explicit integer to override for a single call.
v0 : numpy.ndarray or None, optional
Starting vector for ARPACK's Lanczos iteration. Default is
``None`` — ARPACK seeds from a random vector. Pass a previous
mode shape (full-system length ``N`` or reduced-system length
``n_free``) to warm-start a parametric sweep — ARPACK's
convergence iterations typically drop 1.5–3× when the new
eigenvectors are close to the old ones (e.g. material
sensitivity studies, mistuning identification). Ignored by
non-ARPACK eigen backends.
subprocess_isolated : bool or ``"auto"``, optional
Strict thread isolation via subprocess dispatch.
* ``False`` (default) — in-process solve. BLAS / MUMPS / MKL
pools may pile up across the per-pool ``OMP_NUM_THREADS``
cap; total live thread count typically reaches 2–3× the
cap on real workloads. Use a CPU-affinity pin (option 1,
via ``FEMORPH_SOLVER_AFFINITY=N``) if you need a strict
*core* cap without paying the IPC cost.
* ``True`` — always spawn a fresh Python subprocess for the
solve. Child inherits parent's env (so ``OMP_NUM_THREADS``
etc. take effect at BLAS init time, not after) but starts
with no pre-spawned pools. Total live threads = N exactly.
Costs ~0.5–5 s IPC round-trip depending on K.nnz.
* ``"auto"`` — subprocess iff ``n_dofs >= 50_000``; smaller
problems stay in-process. Tuned so IPC overhead lands
below 25 % of typical solve time on real meshes.
**Why** ``False`` **is the default** (and not ``"auto"``):
the bench in
``doc/source/best-practices/isolation-mode-bench.rst``
showed that on a *quiet* / dedicated host, in-process and
subprocess land within 1 % wall-time of each other; the
spawn + sparse-spill cost cancels out the freed-up
thread-pile-up overhead because there's enough spare
capacity to absorb the extra threads. Forcing subprocess
by default would (a) pay the IPC cost on every solve for
no win on dedicated hosts, (b) interfere with notebook
/ interactive ``print`` semantics from inside the spawned
child, and (c) make the obvious code path
(``solve_modal(K, M, prescribed)``) less obvious. Setting
``"auto"`` in code that mixes solve sizes is the
recommended opt-in for shared / multi-tenant hosts where
the isolation win is measurable (~1.4× faster on the
PBS-rotor cyclic bench under load). See
:mod:`femorph_solver.solvers._subprocess`.
"""
K_ff, M_ff, free = _bc_reduce(K, M, prescribed)
# Subprocess-isolated dispatch — see ``_subprocess`` module
# docstring for the why. ``"auto"`` skips the subprocess on
# micro-solves where IPC overhead would dominate; ``True`` always
# spawns; ``False`` (default) is the in-process incumbent.
if subprocess_isolated:
from ._subprocess import maybe_run_in_subprocess # noqa: PLC0415
return maybe_run_in_subprocess(
"femorph_solver.solvers.modal",
"solve_modal_reduced",
args=(K_ff, M_ff, free),
kwargs={
"n_modes": n_modes,
"eigen_solver": eigen_solver,
"linear_solver": linear_solver,
"sigma": sigma,
"tol": tol,
"thread_limit": thread_limit,
"mixed_precision": mixed_precision,
"v0": v0,
},
n_dofs=int(K_ff.shape[0]),
isolated=subprocess_isolated,
)
return solve_modal_reduced(
K_ff,
M_ff,
free,
n_modes=n_modes,
eigen_solver=eigen_solver,
linear_solver=linear_solver,
sigma=sigma,
tol=tol,
thread_limit=thread_limit,
mixed_precision=mixed_precision,
v0=v0,
)