Source code for SALib.analyze.discrepancy

from typing import Dict

import numpy as np
from scipy.stats import qmc

from SALib.analyze import common_args
from SALib.util import read_param_file, ResultDict, _check_groups



[docs]
def analyze(
    problem: Dict,
    X: np.ndarray,
    Y: np.ndarray,
    method: str = "WD",
    print_to_console: bool = False,
    seed: int = None,
):
    """Discrepancy indices.

    Parameters
    ----------
    problem : dict
        The problem definition
    X, Y : numpy.ndarray
        An array of model inputs and outputs.
    method : {"WD", "CD", "MD", "L2-star"}
        Type of discrepancy. Refer to `scipy.stats.qmc.discrepancy` for more
        details. Default is "WD".
    print_to_console : bool, optional
        Print results directly to console (default False)
    seed : int, optional
        Seed value to ensure deterministic results
        Unused, but defined to maintain compatibility with other functions.

    Notes
    -----
    Compatible with:
        all samplers

    Based on 2D sub projections of ``[Xi,Y]``, the discrepancy of each sample
    is calculated which gives a value for all ``Xi``. This information is used
    as a measure of sensitivity.

    Discrepancy analysis is very fast and is visually explainable. Considering
    two variables ``X1`` and ``X2``, ``X1`` is more influential than ``X2``
    when the scatterplot of ``X1`` against ``Y`` displays a more discernible
    shape than the scatterplot of ``X2`` against ``Y``.

    For the method to work properly, the input parameter space need to be
    uniformly covered as the quality of the measure depends on the value of
    the discrepancy. Taking a 2D sub projection, if the distribution of sample
    along ``Xi`` is not uniform, it will have an impact on the discrepancy,
    the value will increase, i.e. the importance of this parameter would be
    inflated.

    References
    ----------
    1. A. Puy, P.T. Roy and A. Saltelli. 2023. Discrepancy measures for
    sensitivity analysis. https://arxiv.org/abs/2206.13470

    2. A. Saltelli, M. Ratto, T. Andres, F. Campolongo, J. Cariboni, D. Gatelli,
    M. Saisana, and S. Tarantola. 2008.
    Global Sensitivity Analysis: The Primer.
    Wiley, West Sussex, U.K.
    https://dx.doi.org/10.1002/9780470725184
    Accessible at:
    http://www.andreasaltelli.eu/file/repository/Primer_Corrected_2022.pdf

    Examples
    --------

        >>> import numpy as np
        >>> from SALib.sample import latin
        >>> from SALib.analyze import discrepancy
        >>> from SALib.test_functions import Ishigami

        >>> problem = {
        ...   'num_vars': 3,
        ...   'names': ['x1', 'x2', 'x3'],
        ...   'bounds': [[-np.pi, np.pi]]*3
        ... }
        >>> X = latin.sample(problem, 1000)
        >>> Y = Ishigami.evaluate(X)
        >>> Si = discrepancy.analyze(problem, X, Y, print_to_console=True)
    """
    D = problem["num_vars"]
    groups = _check_groups(problem)
    Y = Y.reshape(-1, 1)

    bounds = np.asarray(problem["bounds"]).T
    X = qmc.scale(sample=X, l_bounds=bounds[0], u_bounds=bounds[1], reverse=True)

    Y = qmc.scale(sample=Y, l_bounds=np.min(Y), u_bounds=np.max(Y), reverse=True)

    s_discrepancy = [
        qmc.discrepancy(np.concatenate([X[:, i, None], Y], axis=1), method=method)
        for i in range(D)
    ]

    s_discrepancy = s_discrepancy / np.sum(s_discrepancy)

    if groups:
        groups = np.array(groups)
        unique_grps = [*dict.fromkeys(groups)]
        tmp = np.full((len(unique_grps), 1), np.nan)

        # Take the mean of effects from parameters that are grouped together
        for grp_id, grp in enumerate(unique_grps):
            tmp[grp_id, :] = np.mean(s_discrepancy[groups == grp], axis=0)

        s_discrepancy = tmp.flatten()

    keys = ("s_discrepancy",)
    Si = ResultDict((k, np.zeros(D)) for k in keys)
    Si["names"] = problem["names"]

    Si["s_discrepancy"] = s_discrepancy

    if print_to_console:
        print(Si.to_df())

    return Si




[docs]
def cli_parse(parser):
    parser.add_argument(
        "-X", "--model-input-file", type=str, required=True, help="Model input file"
    )

    return parser




[docs]
def cli_action(args):
    problem = read_param_file(args.paramfile)
    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter)
    Y = np.loadtxt(
        args.model_output_file, delimiter=args.delimiter, usecols=(args.column,)
    )
    analyze(
        problem,
        X,
        Y,
        print_to_console=True,
        seed=args.seed,
    )



if __name__ == "__main__":
    common_args.run_cli(cli_parse, cli_action)