Source code for SALib.analyze.discrepancy

from typing import Dict

import numpy as np
from scipy.stats import qmc

from SALib.analyze import common_args
from SALib.util import read_param_file, ResultDict, _check_groups


[docs] def analyze( problem: Dict, X: np.ndarray, Y: np.ndarray, method: str = "WD", print_to_console: bool = False, seed: int = None, ): """Discrepancy indices. Parameters ---------- problem : dict The problem definition X, Y : numpy.ndarray An array of model inputs and outputs. method : {"WD", "CD", "MD", "L2-star"} Type of discrepancy. Refer to `scipy.stats.qmc.discrepancy` for more details. Default is "WD". print_to_console : bool, optional Print results directly to console (default False) seed : int, optional Seed value to ensure deterministic results Unused, but defined to maintain compatibility with other functions. Notes ----- Compatible with: all samplers Based on 2D sub projections of ``[Xi,Y]``, the discrepancy of each sample is calculated which gives a value for all ``Xi``. This information is used as a measure of sensitivity. Discrepancy analysis is very fast and is visually explainable. Considering two variables ``X1`` and ``X2``, ``X1`` is more influential than ``X2`` when the scatterplot of ``X1`` against ``Y`` displays a more discernible shape than the scatterplot of ``X2`` against ``Y``. For the method to work properly, the input parameter space need to be uniformly covered as the quality of the measure depends on the value of the discrepancy. Taking a 2D sub projection, if the distribution of sample along ``Xi`` is not uniform, it will have an impact on the discrepancy, the value will increase, i.e. the importance of this parameter would be inflated. References ---------- 1. A. Puy, P.T. Roy and A. Saltelli. 2023. Discrepancy measures for sensitivity analysis. https://arxiv.org/abs/2206.13470 2. A. Saltelli, M. Ratto, T. Andres, F. Campolongo, J. Cariboni, D. Gatelli, M. Saisana, and S. Tarantola. 2008. Global Sensitivity Analysis: The Primer. Wiley, West Sussex, U.K. https://dx.doi.org/10.1002/9780470725184 Accessible at: http://www.andreasaltelli.eu/file/repository/Primer_Corrected_2022.pdf Examples -------- >>> import numpy as np >>> from SALib.sample import latin >>> from SALib.analyze import discrepancy >>> from SALib.test_functions import Ishigami >>> problem = { ... 'num_vars': 3, ... 'names': ['x1', 'x2', 'x3'], ... 'bounds': [[-np.pi, np.pi]]*3 ... } >>> X = latin.sample(problem, 1000) >>> Y = Ishigami.evaluate(X) >>> Si = discrepancy.analyze(problem, X, Y, print_to_console=True) """ D = problem["num_vars"] groups = _check_groups(problem) Y = Y.reshape(-1, 1) bounds = np.asarray(problem["bounds"]).T X = qmc.scale(sample=X, l_bounds=bounds[0], u_bounds=bounds[1], reverse=True) Y = qmc.scale(sample=Y, l_bounds=np.min(Y), u_bounds=np.max(Y), reverse=True) s_discrepancy = [ qmc.discrepancy(np.concatenate([X[:, i, None], Y], axis=1), method=method) for i in range(D) ] s_discrepancy = s_discrepancy / np.sum(s_discrepancy) if groups: groups = np.array(groups) unique_grps = [*dict.fromkeys(groups)] tmp = np.full((len(unique_grps), 1), np.nan) # Take the mean of effects from parameters that are grouped together for grp_id, grp in enumerate(unique_grps): tmp[grp_id, :] = np.mean(s_discrepancy[groups == grp], axis=0) s_discrepancy = tmp.flatten() keys = ("s_discrepancy",) Si = ResultDict((k, np.zeros(D)) for k in keys) Si["names"] = problem["names"] Si["s_discrepancy"] = s_discrepancy if print_to_console: print(Si.to_df()) return Si
[docs] def cli_parse(parser): parser.add_argument( "-X", "--model-input-file", type=str, required=True, help="Model input file" ) return parser
[docs] def cli_action(args): problem = read_param_file(args.paramfile) X = np.loadtxt(args.model_input_file, delimiter=args.delimiter) Y = np.loadtxt( args.model_output_file, delimiter=args.delimiter, usecols=(args.column,) ) analyze( problem, X, Y, print_to_console=True, seed=args.seed, )
if __name__ == "__main__": common_args.run_cli(cli_parse, cli_action)