Source code for SALib.analyze.enhanced_hdmr

import math
import warnings
from typing import Dict, Tuple
from types import MethodType
from itertools import combinations as comb, product
from collections import defaultdict, namedtuple

import numpy as np
from pandas import DataFrame as df
from numpy.linalg import det, pinv, matrix_rank
from scipy.linalg import svd, LinAlgError, solve
from scipy import stats, special

from . import common_args
from ..util import read_param_file, ResultDict

__all__ = ["analyze", "cli_parse", "cli_action"]



[docs]
def analyze(
    problem: Dict,
    X: np.ndarray,
    Y: np.ndarray,
    max_order: int = 2,
    poly_order: int = 3,
    bootstrap: int = 20,
    subset: int = None,
    max_iter: int = 100,
    l2_penalty: float = 0.01,
    alpha: float = 0.95,
    extended_base: bool = True,
    print_to_console: bool = False,
    return_emulator: bool = False,
    seed: int = None,
) -> Dict:
    """Compute global sensitivity indices using the meta-modeling technique
    known as High-Dimensional Model Representation (HDMR).

    Introduction
    ------------
    HDMR itself is not a sensitivity analysis method but a surrogate modeling
    approach. It constructs a map of relationship between sets of high
    dimensional inputs and output system variables [1]. This I/O relation can
    be constructed using different basis functions (orthonormal polynomials,
    splines, etc.). The model decomposition can be expressed as

    .. math::
        \\tilde{y} \\approx \\widehat{y} &= f_0 + \\sum_{i=1}^{d} f_i(x_i) +
              \\sum_{i=1}^{d-1} \\sum_{j=i+1}^{d} f_{ij} (x_{ij}) +
              \\sum_{i=1}^{d-2} \\sum_{j=i+1}^{d-1}

              \\sum_{j+1}^{d} f_{ijk} (x_{ijk}) + \\epsilon \\

        \\widehat{y} &= f_0 + \\sum_{u \\subseteq \\{1, 2, ..., d \\}}^{2^n - 1}
          f_u + \\epsilon

    where :math:`u` represents any subset including an empty set. There is a
    unique decomposition regardless of correlation among the input variables
    under the following condition.

    .. math::
        \\forall v \\subseteq u, \\forall g_v: \\int
        f_u (x_u) g_v (x_v) w(\\bm(x)) d\\bm(x) = 0

    This condition implies that a component function is only required to be
    orthogonal to all nested lower order component functions whose variables
    are a subset of its variables. For example, :math:`f_{ijk} (x_i, x_j, x_k )`
    is only required to be orthogonal to :math:`f_i(x_i), f_j(x_j), f_k (x_k),
    f_{ij}(x_i, x_j), f_{ik}(x_i, x_k),` and :math:`f_{jk} (x_j, x_k)`.
    Please keep in mind that this condition is only satisfied when `extended_base`
    is set to `True`.

    HDMR becomes extremely useful when the computational cost of obtaining
    sufficient Monte Carlo samples are prohibitive, as may be the case with
    Sobol's method. It uses least-square regression to reduce the required
    number of samples and thus the number of function (model) evaluations.
    Another advantage of this method is that it can account for correlation
    among the model input. Unlike other variance-based methods, the main
    effects are the combination of structural (uncorrelated) and correlated
    contributions.

    Covariance Decomposition
    ------------------------
    Variance-based sensitivity analysis methods employ a decomposition approach
    to assess the contributions of input sets towards the variance observed in
    the model's output. This method uses the same technique while also considering
    the influence of correlation in the decomposition of output variance.The
    following equation ilustrates how correlation plays a role in variance
    decomposition.

    .. math::
        Var[y] = \\sum_{u=1}^{2^n - 1} Var[f_u] +
            \\sum_{u=1}^{2^n - 1} Cov \\left[f_u, \\sum_{v \\neq u} f_v \\right]

    The first component on the right hand side of the equation depicts the
    uncorrelated contribution to the overall variance, while the subsequent
    component signifies the associated contribution of a specific component
    function in correlation with other component functions. In this method,
    we used `Sa` and `Sb` to represent uncorrelated contribution and
    correlated contribution.

    This method uses as input

    - a N x d matrix of N different d-vectors of model inputs (factors/parameters)
    - a N x 1 vector of corresponding model outputs

    Notes
    -----
    Compatible with:
        all samplers

    Sets an `emulate` method allowing re-use of the emulator.

    Examples
    --------
    .. code-block:: python
        :linenos:

        sp = ProblemSpec({
            'names': ['X1', 'X2', 'X3'],
            'bounds': [[-np.pi, np.pi]] * 3,
            'outputs': ['Y']
        })

        (sp.sample_saltelli(2048)
            .evaluate(Ishigami.evaluate)
            .analyze_enhanced_hdmr()
        )

        sp.emulate()

    Parameters
    ----------
    problem : dict
        The problem definition
    X : numpy.matrix
        The NumPy matrix containing the model inputs, N rows by d columns
    Y : numpy.array
        The NumPy array containing the model outputs for each row of X
    max_order : int (1-3, default: 2)
        Maximum HDMR expansion order
    poly_order : int (1-10, default: 3)
        Maximum polynomial order
    bootstrap : int (1-100, default: 20)
        Number of bootstrap iterations
    subset : int (300-N, default: N/2)
        Number of bootstrap samples. Will be set to length of `Y` if `K` is set to 1.
    max_iter : int (1-1000, default: 100)
        Max iterations backfitting. Not used if extended_base is `True`
    l2_penalty : float (0-10, default: 0.01)
        Regularization term
    alpha : float (0.5-1, default: 0.95)
        Confidence interval for F-test
    extended_base : bool (default: True)
        Extends base matrix if `True`. This guarantees the hierarchical orthogonality
    print_to_console : bool (default: False)
        Prints results directly to console (default: False)
    return_emulator: bool (default: False)
        Attaches emulate method to the Si if `True`
    seed : int (default: None)
        Seed to generate a random number

    Returns
    -------
    Si : ResultDict,
        -"Sa" : Sensitivity index (uncorrelated contribution)
        -"Sa_conf" : Statistical confidence interval of `Sa`
        -"Sb" : Sensitivity index (correlated contribution)
        -"Sb_conf" : Statistical confidence interval of `Sb`
        -"S" : Sensitivity index (total contribution)
        -"S_conf" : Statistical confidence interval of `S`
        -"ST" : Total Sensitivity indexes of features/inputs
        -"ST_conf" : Statistical confidence interval of `ST`
        -"Signf" : Signigicancy for each bootstrap iteration
        -"Term" : Component name
        -emulate() : Emulator method when return_emulator is set to `True`

    References
    ----------
    1. Rabitz, H. and Aliş, Ö.F.,
       General foundations of high dimensional model representations,
       Journal of Mathematical Chemistry 25, 197-233 (1999)
       https://doi.org/10.1023/A:1019188517934

    2. Genyuan Li, H. Rabitz, P.E. Yelvington, O.O. Oluwole, F. Bacon,
       C.E. Kolb, and J. Schoendorf,
       "Global Sensitivity Analysis for Systems with Independent and/or
       Correlated Inputs",
       Journal of Physical Chemistry A, Vol. 114 (19), pp. 6022 - 6032, 2010,
       https://doi.org/10.1021/jp9096919

    3. Gao, Y., Sahin, A., & Vrugt, J. A. (2023)
       Probabilistic sensitivity analysis with dependent variables:
       Covariance-based decomposition of hydrologic models.
       Water Resources Research, 59, e2022WR032834.
       https://doi.org/10.1029/2022WR032834
    """
    # Random Seed
    if seed:
        np.random.seed(seed)

    # Check arguments
    Y, problem, subset, max_iter = _check_args(
        problem,
        X,
        Y,
        max_order,
        poly_order,
        bootstrap,
        subset,
        max_iter,
        l2_penalty,
        alpha,
        extended_base,
    )
    # Instantiate Core Parameters
    hdmr, Si = _core_params(
        problem,
        *X.shape,
        np.mean(Y),
        poly_order,
        max_order,
        bootstrap,
        subset,
        extended_base,
    )
    # Calculate HDMR Basis Matrix
    b_m = _basis_matrix(X, hdmr)
    # Functional ANOVA decomposition
    Si, hdmr = _fanova(b_m, hdmr, Si, Y, bootstrap, max_iter, l2_penalty, alpha)
    # HDMR finalize
    Si = _finalize(hdmr, Si, alpha, return_emulator)
    # Print results to console
    if print_to_console:
        _print(Si)

    return Si



def _check_args(
    problem,
    X,
    Y,
    max_order,
    poly_order,
    bootstrap,
    subset,
    max_iter,
    l2_penalty,
    alpha,
    extended_base,
):
    """Validates all parameters to ensure that they are within the limits"""
    # Make sure Y, output array, is a matrix
    Y = Y.reshape(-1, 1)

    # Get dimensions of input-output
    N, d = X.shape
    y_row = Y.shape[0]

    # If parameter names are not defined
    if "names" not in problem:
        problem["names"] = [f"x_{i}" for i in range(d)]

    # If parameter bounds are not defined
    if "bounds" not in problem:
        problem["bounds"] = [[X[:, i].min(), X[:, i].max()] for i in range(d)]

    # If parameter num_vars are not defined
    if "num_vars" not in problem:
        problem["num_vars"] = d

    # If the length of 'num_vars' in ProblemSpec != Columns in X matrix
    msg = "Problem definition must be consistent with the number of dimension in `X`"
    if "num_vars" in problem and problem["num_vars"] != d:
        raise ValueError(msg)

    # If the length of 'names' in ProblemSpec != Columns in X matrix
    if "names" in problem and len(problem["names"]) != d:
        raise ValueError(msg)

    # If the length of 'bounds' in ProblemSpec != Columns in X matrix
    if "bounds" in problem and len(problem["bounds"]) != d:
        raise ValueError(msg)

    # Now check input-output mismatch
    if d == 1:
        raise RuntimeError(
            "Matrix X contains only a single column: No point to do"
            " sensitivity analysis when d = 1."
        )

    if N < 300:
        raise RuntimeError(
            f"Number of samples in the input matrix X, {N}, is insufficient."
            "Need at least 300."
        )

    if N != y_row:
        raise ValueError(
            f"Dimension mismatch. The number of outputs ({y_row}) should match"
            f" number of samples ({N})"
        )

    if max_order not in (1, 2, 3):
        raise ValueError(
            f"'max_order' key of options should be an integer with values of"
            f" 1, 2 or 3, got {max_order}"
        )

    # Important next check for max_order - as max_order relates to d
    if (d == 2) and (max_order > 2):
        max_order = 2
        warnings.warn("max_order is set to 2 due to lack of third input factor")

    if poly_order not in np.arange(1, 11):
        raise ValueError(
            "'poly_order' key of options should be an integer between 1 to 10."
        )

    if bootstrap not in np.arange(1, 101):
        raise ValueError("'bootstrap' should be an integer between 1 to 100.")

    if (bootstrap == 1) and (subset != y_row):
        subset = y_row
        warnings.warn(f"subset is set to {y_row} due to no bootstrap")

    if subset is None:
        subset = y_row // 2
    elif subset not in np.arange(300, N + 1):
        raise ValueError(
            f"'subset' should be an integer between 300 and {N}, "
            f"the number of rows matrix X."
        )

    if alpha < 0.5 or alpha > 1.0:
        raise ValueError("'alpha' should be a float between 0.5 to 1.0")

    if extended_base:
        max_iter = None
    else:
        if max_iter not in np.arange(100, 1000):
            raise ValueError("'max_iter' should be between 100 and 1000")

    if l2_penalty < 0.0 or l2_penalty > 10:
        raise ValueError("'l2_penalty' should be in between 0 and 10")

    return Y, problem, subset, max_iter


def _core_params(
    problem: Dict,
    N: int,
    d: int,
    f0: float,
    poly_order: int,
    max_order: int,
    bootstrap: int,
    subset: int,
    extended_base: bool,
) -> Tuple[namedtuple, ResultDict]:
    """This function establishes the core parameters of an HDMR
    (High Dimensional Model Representation) expansion and returns
    them in a namedtuple an ResultDict datatype. These parameters
    are used across all functions and procedures related to HDMR.

    Parameters
    ----------
    problem : Dict
        Problem definition
    N : int
        Number of samples in input matrix `X`.
    d : int
        Dimensionality of the problem.
    f0 : float
        Zero-th component function
    poly_order : int
        Polynomial order to be used to calculate orthonormal polynomial.
    max_order : int
        Maximum functional ANOVA expansion order.
    bootstrap : int
        Number of iteration to be used in bootstrap.
    subset : int
        Number of samples to be used in bootstrap.
    extended_base : bool
        Whether to use extended basis matrix or not.

    Returns
    -------
    hdmr : namedtuple
       Core parameters of hdmr expansion

    Si : ResultDict
        Sensitivity Indices

    HDMR Attributes
    ---------------
    N : int
        Number of samples in input matrix `X`.
    d : int
        Dimensionality of the problem.
    max_order : int
        Maximum functional ANOVA expansion order.
    ext_base : bool
        Whether to use extended basis matrix or not.
    subset : int
        Number of samples to be used in bootstrap.
    p_o : int
        Polynomial order to be used to calculated orthonormal polynomial.
    nc1 : int
        Number of component functions in 1st order.
    nc2 : int
        Number of component functions in 2nd order.
    nc3 int
        Number of component functions in 3rd order.
    nc_t : int
        Total number of component functions.
    nt1 : int
        Total number of terms(columns) for a given 1st order component function
    nt2 : int
        Total number of terms(columns) for a given 2nd order component function
    nt3 : int
        Total number of terms(columns) for a given 3rd order component function
    tnt1 : int
        Total number of terms(columns) for all 1st order component functions
    tnt2 : int
        Total number of terms(columns) for all 2nd order component functions
    tnt3 : int
        Total number of terms(columns) for all 3rd order component functions
    a_tnt : int
        All terms (columns) in a hdmr expansion
    x : numpy.array
        Solution of hdmr expansion
    idx : numpy.array
        Indexes of subsamples to be used for bootstrap
    beta : numpy.array
        Arrangement of second-order component functions
    gamma : numpy.array
        Arrangement of third-order component functions
    f0 : float
        Zero-th component function

    Si Keys
    -------
    - "S" : numpy.array
        Sensitivity index (total contribution)
    - "S_conf" : numpy.array
        Statistical confidence interval of `S`
    - "S_sum" : numpy.array
        Sum of sensitivity indexes (total contribution)
    - "S_sum_conf" : numpy.array
        Statistical confidence interval of sum of `S`
    - "Sa" : numpy.array
        Sensitivity index (uncorrelated contribution)
    - "Sa_conf" : numpy.array
        Statistical confidence interval of `Sa`
    - "Sa_sum" : numpy.array
        Sum of sensitivity indexes (uncorrelated contribution)
    - "Sa_sum_conf" : numpy.array
        Statistical confidence interval of sum of `Sa`
    - "Sb" : numpy.array
        Sensitivity index (correlated contribution)
    - "Sb_conf" : numpy.array
        Statistical confidence interval of `Sb`
    - "Sb_sum" : numpy.array
        Sum of sensitivity indexes (correlated contribution)
    - "Sb_sum_conf" : numpy.array
        Statistical confidence interval of sum of `Sb`
    - "ST" : numpy.array
        Total Sensitivity indexes of features/inputs
    - "ST_conf" : numpy.array
        Statistical confidence interval of `ST`
    - "Signf" : numpy.array
        Signigicancy for each bootstrap iteration
    - "Term" : numpy.array
        Component name
    """

    cp = defaultdict(int)
    cp["n_comp_func"], cp["n_coeff"] = [0] * 3, [0] * 3
    cp["n_comp_func"][0] = d
    cp["n_coeff"][0] = poly_order

    if max_order > 1:
        cp["n_comp_func"][1] = math.comb(d, 2)
        cp["n_coeff"][1] = poly_order**2
        if extended_base:
            cp["n_coeff"][1] += 2 * poly_order

    if max_order == 3:
        cp["n_comp_func"][2] = math.comb(d, 3)
        cp["n_coeff"][2] = poly_order**3
        if extended_base:
            cp["n_coeff"][2] += 3 * poly_order + 3 * poly_order**2

    # Setup Bootstrap (if bootstrap > 1)
    idx = (
        np.arange(0, N).reshape(-1, 1)
        if bootstrap == 1
        else np.argsort(np.random.rand(N, bootstrap), axis=0)[:subset]
    )

    CoreParams = namedtuple(
        "CoreParams",
        [
            "N",
            "d",
            "max_order",
            "ext_base",
            "subset",
            "p_o",
            "nc1",
            "nc2",
            "nc3",
            "nc_t",
            "nt1",
            "nt2",
            "nt3",
            "tnt1",
            "tnt2",
            "tnt3",
            "a_tnt",
            "x",
            "idx",
            "beta",
            "gamma",
            "f0",
        ],
    )

    n_comp_func = cp["n_comp_func"]
    n_coeff = cp["n_coeff"]
    hdmr = CoreParams(
        N,
        d,
        max_order,
        extended_base,
        subset,
        poly_order,
        n_comp_func[0],
        n_comp_func[1],
        n_comp_func[2],
        sum(n_comp_func),
        n_coeff[0],
        n_coeff[1],
        n_coeff[2],
        n_coeff[0] * n_comp_func[0],
        n_coeff[1] * n_comp_func[1],
        n_coeff[2] * n_comp_func[2],
        n_coeff[0] * n_comp_func[0]
        + n_coeff[1] * n_comp_func[1]
        + n_coeff[2] * n_comp_func[2],
        np.zeros(
            (
                n_coeff[0] * n_comp_func[0]
                + n_coeff[1] * n_comp_func[1]
                + n_coeff[2] * n_comp_func[2],
                bootstrap,
            )
        ),
        idx,
        np.array(list(comb(range(d), 2))),
        np.array(list(comb(range(d), 3))),  # Returns empty list when d < 3
        f0,
    )

    # Create Sensitivity Indices Result Dictionary
    keys = (
        "Sa",
        "Sa_conf",
        "Sb",
        "Sb_conf",
        "S",
        "S_conf",
        "Signf",
        "Sa_sum",
        "Sa_sum_conf",
        "Sb_sum",
        "Sb_sum_conf",
        "S_sum",
        "S_sum_conf",
    )
    Si = ResultDict(
        (
            (k, np.zeros((hdmr.nc_t, bootstrap)))
            if k in ("S", "Sa", "Sb", "Signf")
            else (k, np.zeros(hdmr.nc_t))
        )
        for k in keys
    )
    Si["Term"] = problem["names"]
    Si["ST"] = np.full(hdmr.nc_t, np.nan)
    Si["ST_conf"] = np.full(hdmr.nc_t, np.nan)

    # Generate index column for printing results
    if max_order > 1:
        for i in range(hdmr.nc2):
            Si["Term"].extend(
                [
                    "/".join(
                        [
                            problem["names"][hdmr.beta[i, 0]],
                            problem["names"][hdmr.beta[i, 1]],
                        ]
                    )
                ]
            )

    if max_order == 3:
        for i in range(hdmr.nc3):
            Si["Term"].extend(
                [
                    "/".join(
                        [
                            problem["names"][hdmr.gamma[i, 0]],
                            problem["names"][hdmr.gamma[i, 1]],
                            problem["names"][hdmr.gamma[i, 2]],
                        ]
                    )
                ]
            )

    return (hdmr, Si)


def _basis_matrix(X, hdmr):
    """The basis matrix represents the foundation of the component functions.
    It is constructed using orthonormal polynomials for each input variable,
    ensuring that it captures the data optimally. The component functions are
    formed by linearly combining the columns of this matrix.

    Parameters
    ----------
    X : numpy.array
        Model input matrix
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    b_m : numpy.array
        Basis matrix
    """
    # Compute normalized X-values
    X_n = (X - np.tile(X.min(0), (X.shape[0], 1))) / np.tile(
        (X.max(0)) - X.min(0), (X.shape[0], 1)
    )

    # Compute Orthonormal Polynomial Coefficients
    coeff = _orth_poly_coeff(X_n, hdmr)

    # Initialize Basis Matrix
    b_m = np.zeros((X.shape[0], hdmr.a_tnt))

    # First order columns of basis matrix
    col = 0
    for i, j in product(range(hdmr.d), range(hdmr.p_o)):
        b_m[:, col] = np.polyval(coeff[j, : j + 2, i], X_n[:, i])
        col += 1

    # Second order columns of basis matrix
    if hdmr.max_order > 1:
        for i, j in _prod(range(0, hdmr.d - 1), range(1, hdmr.d)):
            if hdmr.ext_base:
                b_m[:, col : col + hdmr.p_o] = b_m[:, i * hdmr.p_o : (i + 1) * hdmr.p_o]
                col += hdmr.p_o

                b_m[:, col : col + hdmr.p_o] = b_m[:, j * hdmr.p_o : (j + 1) * hdmr.p_o]
                col += hdmr.p_o

            for k1, k2 in _prod(
                range(i * hdmr.p_o, (i + 1) * hdmr.p_o),
                range(j * hdmr.p_o, (j + 1) * hdmr.p_o),
            ):
                b_m[:, col] = np.multiply(b_m[:, k1], b_m[:, k2])
                col += 1

    # Third order columns of basis matrix
    if hdmr.max_order == 3:
        for i, j, k in _prod(
            range(0, hdmr.d - 2), range(1, hdmr.d - 1), range(2, hdmr.d)
        ):
            if hdmr.ext_base:
                b_m[:, col : col + hdmr.p_o] = b_m[:, i * hdmr.p_o : (i + 1) * hdmr.p_o]
                col += hdmr.p_o

                b_m[:, col : col + hdmr.p_o] = b_m[:, j * hdmr.p_o : (j + 1) * hdmr.p_o]
                col += hdmr.p_o

                b_m[:, col : col + hdmr.p_o] = b_m[:, k * hdmr.p_o : (k + 1) * hdmr.p_o]
                col += hdmr.p_o

                p_o_2 = hdmr.p_o**2
                b_m[:, col : col + p_o_2] = b_m[
                    :,
                    hdmr.tnt1
                    + (2 * hdmr.nt1) * (i + 1)
                    + i * p_o_2 : hdmr.tnt1
                    + (i + 1) * (p_o_2 + 2 * hdmr.nt1),
                ]
                col += p_o_2

                b_m[:, col : col + p_o_2] = b_m[
                    :,
                    hdmr.tnt1
                    + (2 * hdmr.nt1) * (j + 1)
                    + j * p_o_2 : hdmr.tnt1
                    + (j + 1) * (p_o_2 + 2 * hdmr.nt1),
                ]
                col += p_o_2
                b_m[:, col : col + p_o_2] = b_m[
                    :,
                    hdmr.tnt1
                    + (2 * hdmr.nt1) * (k + 1)
                    + k * p_o_2 : hdmr.tnt1
                    + (k + 1) * (p_o_2 + 2 * hdmr.nt1),
                ]
                col += p_o_2

            for l1, l2, l3 in _prod(
                range(i * hdmr.p_o, (i + 1) * hdmr.p_o),
                range(j * hdmr.p_o, (j + 1) * hdmr.p_o),
                range(k * hdmr.p_o, (k + 1) * hdmr.p_o),
            ):
                b_m[:, col] = np.multiply(
                    np.multiply(b_m[:, l1], b_m[:, l2]), b_m[:, l3]
                )
                col += 1

    return b_m


def _orth_poly_coeff(X, hdmr):
    """Calculates the coefficients of orthonormal polynomials based on a given
    input matrix `X`

    Parameters
    ----------
    X : numpy.array
        Normalized Input matrix `X`
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    coeff : numpy.array
        Orthonormal polynomial coefficients from highes degree to constant term
          with trailing zeros

    Notes
    ----------
    Please see the reference below

    .. [1] Szegő, G. 1975. . Orthogonal Polynomials. American Mathematical Society.
    """
    p_o_1 = hdmr.p_o + 1
    M = np.zeros((p_o_1, p_o_1, hdmr.d))
    for i in range(hdmr.d):
        k = 0
        for j in range(p_o_1):
            for z in range(p_o_1):
                M[j, z, i] = sum(X[:, i] ** k) / X.shape[0]
                k += 1
            k = j + 1

    coeff = np.zeros((hdmr.p_o, p_o_1, hdmr.d))
    for i, j in product(range(hdmr.d), range(hdmr.p_o)):
        z = range(j + 2)
        for k in z:
            z__k = list(z)
            z__k.pop(k)
            det_ij = det(M[: j + 1, : j + 1, i]) * det(M[: j + 2, : j + 2, i])
            coeff[j, j + 1 - k, i] = (
                (-1) ** (j + k + 1) * det(M[: j + 1, z__k, i]) / np.sqrt(det_ij)
            )

    return coeff


def _prod(*args):
    """Generator that returns unique cartesian product of given tuple arguments

    Parameters
    ----------
    *args : List[Tuple]
        Variable length argument list.

    Yields
    ------
    Tuple
        A non-duplicate numbers in a tuple.
    """
    seen = set()
    for prod in product(*args):
        prod_set = frozenset(prod)
        if len(prod_set) != len(prod):
            continue
        if prod_set not in seen:
            seen.add(prod_set)
            yield prod


def _fanova(b_m, hdmr, Si, Y, bootstrap, max_iter, l2_penalty, alpha):
    """The functional ANOVA decomposition offers two main approaches:
    the extended base approach and the non-extended base approach. These
    approaches follow the guidelines presented in [1] and [2]. The
    extended base approach provides additional information to ensure
    hierarchical orthogonality.

    Parameters
    ----------
    b_m : numpy.array
        Basis matrix
    hdmr : namedtuple
        Core parameters of hdmr expansion
    Si : ResultDict
        Sensitivity Indices
    Y : numpy.array
        Model output
    bootstrap : int
        Number of iteration to be used in bootstrap
    max_iter : int
        Maximum number of iteration used in backfitting algorithm
    l2_penalty : float
        Penalty term for ridge regression
    alpha : float
        Significant level

    Returns
    -------
    Si : ResultDict
        Sensitivity Indices
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Notes
    -----
    .. [1] Li, G., Rabitz, H., Yelvington, P., Oluwole, O., Bacon, F., Kolb, C.,
        and Schoendorf, J. 2010. Global Sensitivity Analysis for Systems with
        Independent and/or Correlated Inputs. The Journal of Physical Chemistry A,
        114(19), p.6022-6032.
    .. [2] Li, G., Rabitz, H. General formulation of HDMR component functions with
        independent and correlated variables. J Math Chem 50, 99–130 (2012).
        https://doi.org/10.1007/s10910-011-9898-0
    """
    for t in range(bootstrap):
        # Extract model output for a corresponding bootstrap iteration
        Y_idx = Y[hdmr.idx[:, t], 0]
        # Subtract mean from it
        Y_idx -= np.mean(Y_idx)

        if hdmr.ext_base:
            cost = _cost_matrix(b_m[hdmr.idx[:, t], :], hdmr)
            hdmr.x[:, t] = _d_morph(
                b_m[hdmr.idx[:, t], :], cost, Y_idx, bootstrap, hdmr
            )
        else:
            Y_res = _first_order(
                b_m[hdmr.idx[:, t], : hdmr.tnt1], Y_idx, max_iter, l2_penalty, hdmr, t
            )
            if hdmr.max_order > 1:
                Y_res = _second_order(
                    b_m[hdmr.idx[:, t], hdmr.tnt1 : hdmr.tnt1 + hdmr.tnt2],
                    Y_res,
                    max_iter,
                    l2_penalty,
                    hdmr,
                    t,
                )
            if hdmr.max_order == 3:
                _third_order(
                    b_m[hdmr.idx[:, t], hdmr.tnt1 + hdmr.tnt2 :],
                    Y_res,
                    l2_penalty,
                    hdmr,
                )

        # Calculate component functions
        Y_e = _comp_func(b_m[hdmr.idx[:, t], :], hdmr, t)
        # Test significancy
        Si["Signf"][:, t] = _f_test(Y_idx, Y_e, alpha, hdmr)
        # Sensitivity Analysis
        Si["S"][:, t], Si["Sa"][:, t], Si["Sb"][:, t] = _ancova(Y_idx, Y_e, hdmr)

    return Si, hdmr


def _cost_matrix(b_m, hdmr):
    """The cost matrix stores information about hierarchical orthogonality.
    It is structured in a way that ensures orthogonality between component
    functions that are hierarchically related.

    Parameters
    ----------
    b_m : numpy.array
        Basis matrix
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    cost : numpy.array
        Cost matrix
    """
    cost = np.zeros((hdmr.a_tnt, hdmr.a_tnt))

    range_2nd_1 = lambda x: range(  # noqa: E731
        hdmr.tnt1 + (x) * hdmr.nt2, hdmr.tnt1 + (x + 1) * hdmr.nt2
    )
    range_2nd_2 = lambda x: range(  # noqa: E731
        hdmr.tnt1 + (x) * hdmr.nt2, hdmr.tnt1 + (x) * hdmr.nt2 + hdmr.p_o * 2
    )
    range_3rd_1 = lambda x: range(  # noqa: E731
        hdmr.tnt1 + hdmr.tnt2 + (x) * hdmr.nt3,
        hdmr.tnt1 + hdmr.tnt2 + (x + 1) * hdmr.nt3,
    )
    range_3rd_2 = lambda x: range(  # noqa: E731
        hdmr.tnt1 + hdmr.tnt2 + (x) * hdmr.nt3,
        hdmr.tnt1 + hdmr.tnt2 + (x) * hdmr.nt3 + 3 * hdmr.p_o + 3 * hdmr.p_o**2,
    )

    if hdmr.max_order > 1:
        sr_i = np.mean(b_m, axis=0, keepdims=True)
        sr_ij = np.zeros((2 * hdmr.p_o + 1, hdmr.nt2))
        ct = 0
        for _ in _prod(range(0, hdmr.d - 1), range(1, hdmr.d)):
            sr_ij[0, :] = sr_i[0, range_2nd_1(ct)]
            sr_ij[1:, :] = (
                b_m[:, range_2nd_2(ct)].T @ b_m[:, range_2nd_1(ct)]
            ) / hdmr.subset
            cost[np.ix_(range_2nd_1(ct), range_2nd_1(ct))] = sr_ij.T @ sr_ij
            ct += 1
    if hdmr.max_order == 3:
        sr_ijk = np.zeros((3 * hdmr.p_o + 3 * hdmr.p_o**2 + 1, hdmr.nt3))
        ct = 0
        for _ in _prod(range(0, hdmr.d - 2), range(1, hdmr.d - 1), range(2, hdmr.d)):
            sr_ijk[0, :] = sr_i[0, range_3rd_1(ct)]
            sr_ijk[1:, :] = (
                b_m[:, range_3rd_2(ct)].T @ b_m[:, range_3rd_1(ct)] / hdmr.subset
            )
            cost[np.ix_(range_3rd_1(ct), range_3rd_1(ct))] = sr_ijk.T @ sr_ijk
            ct += 1

    return cost


def _d_morph(b_m, cost, Y_idx, subset, hdmr):
    """D-Morph Regression finds the best solution that aligns with the cost
    matrix. Cost matrix in this case represents the hierarchical orthogonality
    between component functions.

    Parameters
    ----------
    b_m : numpy.array
        Basis matrix for all component functions
    cost : numpy.array
        Cost matrix that satisfies hierarchical orthogonality
    Y_idx : numpy.array
        Model output for a single bootstrap iteration
    subset : int
        Number of subsamples
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    soltn : numpy.array
        D-MORPH solution

    Notes
    -----
    Detailed information about D-Morph Regression can be found at
    .. [1] Li, G., Rey-de-Castro, R. & Rabitz, H. D-MORPH regression for modeling
        with fewer unknown parameters than observation data.
        J Math Chem 50, 1747–1764 (2012). https://doi.org/10.1007/s10910-012-0004-z
    """
    # Left Matrix Multiplication with the transpose of cost matrix
    a = (b_m.T @ b_m) / subset  # LHS
    b = (b_m.T @ Y_idx) / subset  # RHS
    try:
        # Pseudo-Inverse of LHS
        a_pinv = pinv(a, hermitian=True)
        rank = matrix_rank(a)
        # Least-Square Solution
        x = a_pinv @ b
        # Projection Matrix
        pr = np.eye(hdmr.a_tnt) - (a_pinv @ a)
        pb = pr @ cost
        U, _, Vh = svd(pb)
    except LinAlgError:
        raise LinAlgError("D-Morph: Pseudo-Inverse did not converge")

    nullity = min(b_m.shape) - rank
    V = Vh.T
    U = np.delete(U, range(0, nullity), axis=1)
    V = np.delete(V, range(0, nullity), axis=1)

    # D-Morph Regression Solution
    soltn = V @ pinv(U.T @ V) @ U.T @ x

    return soltn


def _first_order(b_m1, Y_idx, max_iter, l2_penalty, hdmr, t):
    """Sequential determination of first order component functions.
    First, it computes component functions via ridge regression, i.e.
    fitting model inputs/features to the model output. Later, it takes
    advantage of backfitting algorithm to satisfy hierarchical orthogonality.
    Backfitting algorithm does not guarantee the hierarchical orthogonality.
    We suggest to set extended base option to `True` for those who want to
    guaranteed functional ANOVA expansion.

    Parameters
    ----------
    b_m1 : numpy.array
        Basis matrix for first-order component functions
    Y_idx : numpy.array
        Model output for a single bootstrap iteration
    max_iter : int
        Maximum number of iteration used in backfitting algorithm
    l2_penalty : float
        Penalty term for ridge regression
    hdmr : namedtuple
        Core parameters of hdmr expansion
    t : int
        bootstrap iteration

    Returns
    -------
    Y_res : numpy.array
        Residual model output
    """
    # Temporary first order component matrix
    Y_i = np.empty((hdmr.subset, hdmr.nc1))
    # Initialize iter
    iter = 0
    # To increase readibility
    n1 = hdmr.nt1
    # L2 Penalty
    lambda_eye = l2_penalty * np.identity(n1)
    for i in range(hdmr.nc1):
        try:
            # Left hand side
            a = (
                b_m1[:, i * n1 : n1 * (i + 1)].T @ b_m1[:, i * n1 : n1 * (i + 1)]
            ) / hdmr.subset
            # Adding L2 Penalty (Ridge Regression)
            a += lambda_eye
            # Right hand side
            b = (b_m1[:, i * n1 : n1 * (i + 1)].T @ Y_idx) / hdmr.subset
            # Solution
            hdmr.x[i * n1 : n1 * (i + 1), t] = solve(a, b)
            # Component functions
            Y_i[:, i] = (
                b_m1[:, i * n1 : n1 * (i + 1)] @ hdmr.x[i * n1 : n1 * (i + 1), t]
            )
        except LinAlgError:
            raise LinAlgError(
                "First Order: Least-square regression did not converge."
                "Try increasing L2 penalty term"
            )

    # Backfitting method
    var_old = np.square(hdmr.x[: hdmr.tnt1, t])
    z_t = list(range(hdmr.d))
    while True:
        for i in range(hdmr.d):
            z = z_t[:]
            z.remove(i)
            Y_res = Y_idx - np.sum(Y_i[:, z], axis=1)
            # Left hand side
            a = (
                b_m1[:, i * n1 : n1 * (i + 1)].T @ b_m1[:, i * n1 : n1 * (i + 1)]
            ) / hdmr.subset
            # Right hand side
            b = (b_m1[:, i * n1 : n1 * (i + 1)].T @ Y_res) / hdmr.subset
            # Solution
            hdmr.x[i * n1 : n1 * (i + 1), t] = solve(a, b)
            # Component functions
            Y_i[:, i] = (
                b_m1[:, i * n1 : n1 * (i + 1)] @ hdmr.x[i * n1 : n1 * (i + 1), t]
            )

        var_max = np.absolute(var_old - np.square(hdmr.x[: hdmr.tnt1, t])).max()
        iter += 1

        if (var_max < 1e-4) or (iter > max_iter):
            break

        var_old = np.square(hdmr.x[: hdmr.tnt1, t])

    return Y_idx - np.sum(Y_i, axis=1)


def _second_order(b_m2, Y_res, max_iter, l2_penalty, hdmr, t):
    """Sequential determination of second-order component functions.
    First, it computes component functions via ridge regression, i.e.
    fitting model inputs/features to the model output. Later, it takes
    advantage of backfitting algorithm to satisfy hierarchical orthogonality.
    Backfitting algorithm does not guarantee the hierarchical orthogonality.
    We suggest to set extended base option to `True` for those who want to
    guaranteed functional ANOVA expansion.

    Parameters
    ----------
    b_m2 : numpy.array
        Basis matrix for second-order component functions
    Y_res : numpy.array
        Residual model output
    max_iter : int
        Maximum number of iteration used in backfitting algorithm
    l2_penalty : float
        Penalty term for ridge regression
    hdmr : namedtuple
        Core parameters of hdmr expansion
    t : int
        bootstrap iteration

    Returns
    -------
    Y_res : numpy.array
        Residual model output
    """
    # Temporary second order component matrix
    Y_ij = np.empty((hdmr.subset, hdmr.nc2))
    # To increase readibility
    n2 = hdmr.nt2
    # Initialize iteration counter
    iter = 0
    # L2 Penalty
    lambda_eye = l2_penalty * np.identity(n2)
    for i in range(hdmr.nc2):
        try:
            # Left hand side
            a = (
                b_m2[:, i * n2 : n2 * (i + 1)].T @ b_m2[:, i * n2 : n2 * (i + 1)]
            ) / hdmr.subset
            # Adding L2 Penalty (Ridge Regression)
            a += lambda_eye
            # Right hand side
            b = (b_m2[:, i * n2 : n2 * (i + 1)].T @ Y_res) / hdmr.subset
            # Solution
            hdmr.x[hdmr.tnt1 + i * n2 : hdmr.tnt1 + n2 * (i + 1), t] = solve(a, b)
            # Component functions
            Y_ij[:, i] = (
                b_m2[:, i * n2 : n2 * (i + 1)]
                @ hdmr.x[hdmr.tnt1 + i * n2 : hdmr.tnt1 + n2 * (i + 1), t]
            )
        except LinAlgError:
            raise LinAlgError(
                "Second Order: Least-square regression did not converge."
                "Try increasing L2 penalty term"
            )

    var_old = np.square(hdmr.x[hdmr.tnt1 : hdmr.tnt1 + hdmr.tnt2, t])
    # Backfitting method
    while True:
        for i in range(hdmr.nc2):
            z = list(range(hdmr.nc2))
            z.remove(i)
            Y_r = Y_res - np.sum(Y_ij[:, z], axis=1)
            # Left hand side
            a = (
                b_m2[:, i * n2 : n2 * (i + 1)].T @ b_m2[:, i * n2 : n2 * (i + 1)]
            ) / hdmr.subset
            # Right hand side
            b = (b_m2[:, i * n2 : n2 * (i + 1)].T @ Y_r) / hdmr.subset
            # Solution
            hdmr.x[hdmr.tnt1 + i * n2 : hdmr.tnt1 + n2 * (i + 1), t] = solve(a, b)
            # Component functions
            Y_ij[:, i] = (
                b_m2[:, i * n2 : n2 * (i + 1)]
                @ hdmr.x[hdmr.tnt1 + i * n2 : hdmr.tnt1 + n2 * (i + 1), t]
            )

        var_max = np.absolute(
            var_old - np.square(hdmr.x[hdmr.tnt1 : hdmr.tnt1 + hdmr.tnt2, t])
        ).max()
        iter += 1

        if (var_max < 1e-4) or (iter > max_iter):
            break

        var_old = np.square(hdmr.x[hdmr.tnt1 : hdmr.tnt1 + hdmr.tnt2, t])

    return Y_res - np.sum(Y_ij, axis=1)


def _third_order(b_m3, Y_res, l2_penalty, hdmr, t):
    """Sequential determination of third-order component functions.
    it computes component functions via ridge regression, i.e.
    fitting model inputs/features to the model output.

    Parameters
    ----------
    b_m3 : numpy.array
        Basis matrix for third-order component functions
    Y_res : numpy.array
        Residual model output
    l2_penalty : float
        Penalty term for ridge regression
    hdmr : namedtuple
        Core parameters of hdmr expansion
    t : int
        bootstrap iteration

    Notes
    -----
    Backfitting algorithm is not used here because it may be
    unstable when residual model, Y_res, is close to arrays of zero.
    """
    # To increase readibility
    n3 = hdmr.nt3
    # L2 Penalty
    lambda_eye = l2_penalty * np.identity(n3)
    for i in range(hdmr.nc3):
        try:
            # Left hand side
            a = (
                b_m3[:, i * n3 : n3 * (i + 1)].T @ b_m3[:, i * n3 : n3 * (i + 1)]
            ) / hdmr.subset
            # Adding L2 Penalty (Ridge Regression)
            a += lambda_eye
            # Right hand side
            b = (b_m3[:, i * n3 : n3 * (i + 1)].T @ Y_res) / hdmr.subset
            # Solution
            hdmr.x[
                hdmr.tnt1 + hdmr.tnt2 + i * n3 : hdmr.tnt1 + hdmr.tnt2 + n3 * (i + 1), t
            ] = solve(a, b)
        except LinAlgError:
            raise LinAlgError(
                "Third Order: Least-square regression did not converge."
                "Try increasing L2 penalty term"
            )


def _comp_func(b_m, hdmr, t=None, emulator=None):
    """Computes the component function based on basis matrix and the solution

    Parameters
    ----------
    b_m : numpy.array
        Basis matrix
    hdmr : namedtuple
        Core parameters of hdmr expansion
    t : int
        Bootstrap iteration
    emulator : bool
        Whether it is called by emulator or not

    Returns
    -------
    Y_e : numpy.array
        Emualated model output for each components
    """
    Y_t = np.zeros((b_m.shape[0], hdmr.a_tnt))
    Y_e = np.zeros((b_m.shape[0], hdmr.nc_t))

    # Temporary matrix
    if emulator:  # Use average of solutions if it is called by emulator
        Y_t = np.multiply(b_m, np.tile(hdmr.x.mean(axis=1), [b_m.shape[0], 1]))
    else:  # Use the t-th solution if it is called by fanova
        Y_t = np.multiply(b_m, np.tile(hdmr.x[:, t], [b_m.shape[0], 1]))

    # First order component functions
    for i in range(hdmr.nc1):
        Y_e[:, i] = np.sum(Y_t[:, i * hdmr.p_o : (i + 1) * hdmr.p_o], axis=1)

    # Second order component functions
    if hdmr.max_order > 1:
        for i in range(hdmr.nc2):
            Y_e[:, hdmr.nc1 + i] = np.sum(
                Y_t[:, hdmr.tnt1 + (i) * hdmr.nt2 : hdmr.tnt1 + (i + 1) * hdmr.nt2],
                axis=1,
            )

    # Third order component functions
    if hdmr.max_order == 3:
        for i in range(hdmr.nc3):
            Y_e[:, hdmr.nc1 + hdmr.nc2 + i] = np.sum(
                Y_t[
                    :,
                    hdmr.tnt1
                    + hdmr.tnt2
                    + (i) * hdmr.nt3 : hdmr.tnt1
                    + hdmr.tnt2
                    + (i + 1) * hdmr.nt3,
                ],
                axis=1,
            )

    return Y_e


def _ancova(Y_idx, Y_e, hdmr):
    """Analysis of Covariance. It calculates uncorrelated and correlated contribution
    to the model output variance

    Parameters
    ----------
    Y_idx : numpy.array
        Model output for a single bootstrap iteration
    Y_e : numpy.array
        Emulated output
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    S : numpy.array
        Sensitivity index (total contribution)
    Sa : numpy.array
        Sensitivity index (uncorrelated contribution)
    Sb : numpy.array
        Sensitivity index (correlated contribution)

    Notes
    -----
    Please see the reference below

    .. [1] Li, G., Rabitz, H., Yelvington, P., Oluwole, O., Bacon, F., Kolb, C., and
           Schoendorf, J. 2010.
           Global Sensitivity Analysis for Systems with Independent and/or
           Correlated Inputs.
           The Journal of Physical Chemistry A, 114(19), p.6022-6032.
    """
    # Initialize sensitivity indices
    S = np.zeros(hdmr.nc_t)
    Sa = np.zeros(hdmr.nc_t)
    Sb = np.zeros(hdmr.nc_t)

    # Compute the sum of all Y_em terms
    Y_sum = np.sum(Y_e, axis=1)

    # Total Variance
    tot_v = np.var(Y_idx)

    # Analysis of covariance
    for j in range(hdmr.nc_t):
        # Covariance matrix of jth term of Y_em and actual Y
        c = np.cov(np.stack((Y_e[:, j], Y_idx), axis=0))

        # Total sensitivity of jth term
        S[j] = c[0, 1] / tot_v  # Eq. 19

        # Covariance matrix of jth term with emulator Y without jth term
        c = np.cov(np.stack((Y_e[:, j], Y_sum - Y_e[:, j]), axis=0))

        # Structural contribution of jth term
        Sa[j] = c[0, 0] / tot_v  # Eq. 20

        # Correlative contribution of jth term
        Sb[j] = c[0, 1] / tot_v  # Eq. 21

    return S, Sa, Sb


def _f_test(Y_idx, Y_e, alpha, hdmr):
    """Finds component functions that make significant contribution to the
    model output. This statistical analysis is done by F-test which uses
    F-distribution.

    Parameters
    ----------
    Y_idx : numpy.array
        Model output for a single bootstrap iteration
    Y_e : numpy.array
        Emulated output
    alpha : float
        Significance level
    hdmr : namedtuple
        Core parameters of hdmr expansion

    Returns
    -------
    result : numpy.array
        Binary array that shows significant components
    """
    # Initialize result array
    result = np.zeros(hdmr.nc_t)
    # Sum of squared residuals of smaller model
    SSR0 = (Y_idx**2).sum()
    # Now adding each term to the model
    for i in range(hdmr.nc_t):
        # Model with ith term excluded
        Y_res = Y_idx - Y_e[:, i]
        # Number of parameters of proposed model (order dependent)
        if i < hdmr.nc1:
            p1 = hdmr.nt1  # 1st order
        elif hdmr.nc1 <= i < (hdmr.nc1 + hdmr.nc2):
            p1 = hdmr.nt2  # 2nd order
        else:
            p1 = hdmr.nt3  # 3rd order
        # Sum of squared residuals of bigger model
        SSR1 = (Y_res**2).sum()
        # Now calculate the F_stat (F_stat > 0 -> SSR1 < SSR0 )
        F_stat = ((SSR0 - SSR1) / p1) / (SSR1 / (hdmr.subset - p1))
        # Now calculate critical F value
        F_crit = stats.f.ppf(q=alpha, dfn=p1, dfd=hdmr.subset - p1)
        # Now determine whether to accept ith component into model
        if F_stat > F_crit:
            # ith term is significant and should be included in model
            result[i] = 1

    return result


def _finalize(hdmr, Si, alpha, return_emulator):
    """Final processing of sensivity analysis. Calculates confidence interval
    using statistical analysis.

    Parameters
    ----------
    hdmr : namedtuple
        Core parameters of hdmr expansion
    Si : ResultDict
        Sensitivity Indices
    alpha : float
        Significance level
    return_emulator : bool
        Whether to attach emulator to the Si ResultDict

    Returns
    -------
    Si : ResultDict
        Sensitivity Indices
    """

    # Z score
    def z(p):
        return (-1) * np.sqrt(2) * special.erfcinv(p * 2)

    # Multiplier for confidence interval
    mult = z(alpha + (1 - alpha) / 2)

    # Compute the total sensitivity of each parameter/coefficient
    for r in range(hdmr.d):
        if hdmr.max_order == 1:
            TS = hdmr.S[r, :]
        elif hdmr.max_order == 2:
            ij = hdmr.d + np.where(np.sum(hdmr.beta == r, axis=1) == 1)[0]
            TS = np.sum(Si["S"][np.append(r, ij), :], axis=0)
        elif hdmr.max_order == 3:
            ij = hdmr.d + np.where(np.sum(hdmr.beta == r, axis=1) == 1)[0]
            ijk = hdmr.d + hdmr.nc2 + np.where(np.sum(hdmr.nc3 == r, axis=1) == 1)[0]
            TS = np.sum(Si["S"][np.append(r, np.append(ij, ijk)), :], axis=0)
        Si["ST"][r] = np.mean(TS)
        Si["ST_conf"][r] = mult * np.std(TS)

    # Compute Confidence Interval
    Si["Sa_conf"] = mult * np.std(Si["Sa"], axis=1)
    Si["Sb_conf"] = mult * np.std(Si["Sb"], axis=1)
    Si["S_conf"] = mult * np.std(Si["S"], axis=1)
    Si["Sa_sum_conf"] = mult * np.std(np.sum(Si["Sa"]))
    Si["Sb_sum_conf"] = mult * np.std(np.sum(Si["Sb"]))
    Si["S_sum_conf"] = mult * np.std(np.sum(Si["S"]))

    # Assign Bootstrap Results to Si Dict
    Si["Sa"] = np.mean(Si["Sa"], axis=1)
    Si["Sb"] = np.mean(Si["Sb"], axis=1)
    Si["S"] = np.mean(Si["S"], axis=1)
    Si["Sa_sum"] = np.mean(np.sum(Si["Sa"], axis=0))
    Si["Sb_sum"] = np.mean(np.sum(Si["Sb"], axis=0))
    Si["S_sum"] = np.mean(np.sum(Si["S"], axis=0))

    # F-test number of selection to print out
    Si["Signf"] = 100 * Si["Signf"].mean(axis=1)

    # Bind emulator method to the ResultDict
    if return_emulator:
        Si["hdmr"] = hdmr
        Si.emulate = MethodType(emulate, Si)

    # Bind Pandas Dataframe conversion to the ResultDict
    Si.to_df = MethodType(to_df, Si)

    return Si


def to_df(self):
    """Conversion method to Pandas DataFrame. To be attached to ResultDict.

    Returns
    -------
    Pandas DataFrame
    """
    names = self["Term"]

    # Only convert these elements in dict to DF
    include_list = ["Sa", "Sb", "S", "ST"]
    include_list += [f"{name}_conf" for name in include_list]
    new_spec = {k: v for k, v in self.items() if k in include_list}

    return df(new_spec, index=names)


def emulate(self, X):
    """Emulates model output with new input data.

    Constructs orthonormal polynomials with new input matrix, X,
    and multiplies it with solution array, hdmr.x

    Compares emulated results with observed vector, Y.

    Returns
    ========
    Y_em : numpy.array
        Emulated output
    """
    # Calculate HDMR Basis Matrix
    b_m = _basis_matrix(X, self["hdmr"])

    # Get component functions
    Y_em = _comp_func(b_m, self["hdmr"], emulator=True)

    return np.sum(Y_em, axis=1)


def _print(Si):
    nc_t = len(Si["Sa"])
    d = np.isnan(Si["ST"]).sum()
    print("\n")
    cols = "Term    \t      Sa            Sb             S             ST         Significancy "  # noqa: E501
    print(cols)
    print("-" * 88)  # Header break

    format1 = "%-11s   \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f)    %-3.2f%%"  # noqa: E501
    format2 = "%-11s   \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f)                  %-3.2f%%"  # noqa: E501

    for i in range(nc_t):
        if i < d:
            print(
                format1
                % (
                    Si["Term"][i],
                    Si["Sa"][i],
                    Si["Sa_conf"][i],
                    Si["Sb"][i],
                    Si["Sb_conf"][i],
                    Si["S"][i],
                    Si["S_conf"][i],
                    Si["ST"][i],
                    Si["ST_conf"][i],
                    Si["Signf"][i],
                )
            )
        else:
            print(
                format2
                % (
                    Si["Term"][i],
                    Si["Sa"][i],
                    Si["Sa_conf"][i],
                    Si["Sb"][i],
                    Si["Sb_conf"][i],
                    Si["S"][i],
                    Si["S_conf"][i],
                    Si["Signf"][i],
                )
            )

    print("-" * 88)  # Header break

    format3 = "%-11s   \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f)"
    print(
        format3
        % (
            "Sum",
            Si["Sa_sum"],
            Si["Sa_sum_conf"],
            Si["Sb_sum"],
            Si["Sb_sum_conf"],
            Si["S_sum"],
            Si["S_sum_conf"],
        )
    )

    keys = ("Sa_sum", "Sb_sum", "S_sum", "Sa_sum_conf", "Sb_sum_conf", "S_sum_conf")
    for k in keys:
        Si.pop(k, None)



[docs]
def cli_parse(parser):
    parser.add_argument(
        "-X",
        "--model-input-file",
        type=str,
        required=True,
        default=None,
        help="Model input file",
    )
    parser.add_argument(
        "-mor",
        "--max-order",
        type=int,
        required=True,
        default=2,
        help="Order of HDMR expansion 1-3",
    )
    parser.add_argument(
        "-por",
        "--poly-order",
        type=int,
        required=True,
        default=2,
        help="Maximum polynomial order 1-10",
    )
    parser.add_argument(
        "-K",
        "--bootstrap",
        type=int,
        required=False,
        default=20,
        help="Number of bootstrap iteration 1-100",
    )
    parser.add_argument(
        "-R",
        "--subset",
        type=int,
        required=False,
        default=None,
        help="Number of bootstrap samples 300-N",
    )
    parser.add_argument(
        "-mit",
        "--max-iter",
        type=int,
        required=False,
        default=100,
        help="Maximum iteration for backfitting method 1-1000",
    )
    parser.add_argument(
        "-l2",
        "--l2-penalty",
        type=float,
        required=False,
        default=0.01,
        help="Regularization term",
    )
    parser.add_argument(
        "-a",
        "--alpha",
        type=float,
        required=False,
        default=0.95,
        help="Confidence interval for F-Test",
    )
    parser.add_argument(
        "-ext",
        "--extended-base",
        type=lambda x: (str(x).lower() == "true"),
        required=True,
        default=True,
        help="Whether to use extended base matrix",
    )
    parser.add_argument(
        "-print",
        "--print-to-console",
        type=lambda x: (str(x).lower() == "true"),
        required=False,
        default=False,
        help="Whether to print out result to the console",
    )
    parser.add_argument(
        "-emul",
        "--return-emulator",
        type=lambda x: (str(x).lower() == "true"),
        required=False,
        default=False,
        help="Whether to attach emulate() method to the ResultDict",
    )
    return parser




[docs]
def cli_action(args):
    problem = read_param_file(args.paramfile)
    Y = np.loadtxt(
        args.model_output_file, delimiter=args.delimiter, usecols=(args.column,)
    )
    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)

    options = {
        "max_order": args.max_order,
        "poly_order": args.poly_order,
        "bootstrap": args.bootstrap,
        "subset": args.subset,
        "max_iter": args.max_iter,
        "l2_penalty": args.l2_penalty,
        "alpha": args.alpha,
        "extended_base": args.extended_base,
        "print_to_console": args.print_to_console,
        "return_emulator": args.return_emulator,
    }

    if len(X.shape) == 1:
        X = X.reshape((len(X), 1))

    analyze(problem, X, Y, **options)



if __name__ == "__main__":
    common_args.run_cli(cli_parse, cli_action)