Source code for SALib.sample.saltelli

from typing import Dict
import math
import warnings

import numpy as np

from . import common_args
from . import sobol_sequence
from ..util import scale_samples, read_param_file, compute_groups_matrix, _check_groups


[docs] def sample( problem: Dict, N: int, calc_second_order: bool = True, skip_values: int = None ): """Generates model inputs using Saltelli's extension of the Sobol' sequence The Sobol' sequence is a popular quasi-random low-discrepancy sequence used to generate uniform samples of parameter space. Returns a NumPy matrix containing the model inputs using Saltelli's sampling scheme. Saltelli's scheme extends the Sobol' sequence in a way to reduce the error rates in the resulting sensitivity index calculations. If `calc_second_order` is False, the resulting matrix has ``N * (D + 2)`` rows, where ``D`` is the number of parameters. If `calc_second_order` is `True`, the resulting matrix has ``N * (2D + 2)`` rows. These model inputs are intended to be used with :func:`SALib.analyze.sobol.analyze`. .. deprecated:: 1.4.6 Notes ----- The initial points of the Sobol' sequence has some repetition (see Table 2 in Campolongo [1]), which can be avoided by setting the `skip_values` parameter. Skipping values reportedly improves the uniformity of samples. It has been shown that naively skipping values may reduce accuracy, increasing the number of samples needed to achieve convergence (see Owen [2]). A recommendation adopted here is that both `skip_values` and `N` be a power of 2, where `N` is the desired number of samples (see [2] and discussion in [5] for further context). It is also suggested therein that ``skip_values >= N``. The method now defaults to setting `skip_values` to a power of two that is ``>= N``. If `skip_values` is provided, the method now raises a UserWarning in cases where sample sizes may be sub-optimal according to the recommendation above. Parameters ---------- problem : dict The problem definition N : int The number of samples to generate. Ideally a power of 2 and <= `skip_values`. calc_second_order : bool Calculate second-order sensitivities (default True) skip_values : int or None Number of points in Sobol' sequence to skip, ideally a value of base 2 (default: a power of 2 >= N, or 16; whichever is greater) References ---------- 1. Campolongo, F., Saltelli, A., Cariboni, J., 2011. From screening to quantitative sensitivity analysis. A unified approach. Computer Physics Communications 182, 978-988. https://doi.org/10.1016/j.cpc.2010.12.039 2. Owen, A. B., 2020. On dropping the first Sobol' point. arXiv:2008.08051 [cs, math, stat]. Available at: http://arxiv.org/abs/2008.08051 (Accessed: 20 April 2021). 3. Saltelli, A., 2002. Making best use of model evaluations to compute sensitivity indices. Computer Physics Communications 145, 280-297. https://doi.org/10.1016/S0010-4655(02)00280-1 4. Sobol', I.M., 2001. Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates. Mathematics and Computers in Simulation, The Second IMACS Seminar on Monte Carlo Methods 55, 271-280. https://doi.org/10.1016/S0378-4754(00)00270-6 5. Discussion: https://github.com/scipy/scipy/pull/10844 https://github.com/scipy/scipy/pull/10844#issuecomment-672186615 https://github.com/scipy/scipy/pull/10844#issuecomment-673029539 """ warnings.warn( "`salib.sample.saltelli` will be removed in SALib 1.5.1 Please use" " `salib.sample.sobol`", category=DeprecationWarning, stacklevel=2, ) # bit-shift test to check if `N` == 2**n if not ((N & (N - 1) == 0) and (N != 0 and N - 1 != 0)): msg = f""" Convergence properties of the Sobol' sequence is only valid if `N` ({N}) is equal to `2^n`. """ warnings.warn(msg) if skip_values is None: # If not specified, set skip_values to next largest power of 2 skip_values = int(2 ** math.ceil(math.log(N) / math.log(2))) # 16 is arbitrarily selected here to avoid initial points # for very low sample sizes skip_values = max(skip_values, 16) elif skip_values > 0: M = skip_values if not ((M & (M - 1) == 0) and (M != 0 and M - 1 != 0)): msg = f""" Convergence properties of the Sobol' sequence is only valid if `skip_values` ({M}) is a power of 2. """ warnings.warn(msg) # warning when N > skip_values # see https://github.com/scipy/scipy/pull/10844#issuecomment-673029539 n_exp = int(math.log(N, 2)) m_exp = int(math.log(M, 2)) if n_exp > m_exp: msg = ( "Convergence may not be valid as the number of " "requested samples is" f" > `skip_values` ({N} > {M})." ) warnings.warn(msg) elif skip_values == 0: warnings.warn("Duplicate samples will be taken as no points are skipped.") else: assert ( isinstance(skip_values, int) and skip_values >= 0 ), "`skip_values` must be a positive integer." D = problem["num_vars"] groups = _check_groups(problem) if not groups: Dg = problem["num_vars"] else: G, group_names = compute_groups_matrix(groups) Dg = len(set(group_names)) # Create base sequence - could be any type of sampling base_sequence = sobol_sequence.sample(N + skip_values, 2 * D) if calc_second_order: saltelli_sequence = np.zeros([(2 * Dg + 2) * N, D]) else: saltelli_sequence = np.zeros([(Dg + 2) * N, D]) index = 0 for i in range(skip_values, N + skip_values): # Copy matrix "A" for j in range(D): saltelli_sequence[index, j] = base_sequence[i, j] index += 1 # Cross-sample elements of "B" into "A" for k in range(Dg): for j in range(D): if (not groups and j == k) or (groups and group_names[k] == groups[j]): saltelli_sequence[index, j] = base_sequence[i, j + D] else: saltelli_sequence[index, j] = base_sequence[i, j] index += 1 # Cross-sample elements of "A" into "B" # Only needed if you're doing second-order indices (true by default) if calc_second_order: for k in range(Dg): for j in range(D): if (not groups and j == k) or ( groups and group_names[k] == groups[j] ): saltelli_sequence[index, j] = base_sequence[i, j] else: saltelli_sequence[index, j] = base_sequence[i, j + D] index += 1 # Copy matrix "B" for j in range(D): saltelli_sequence[index, j] = base_sequence[i, j + D] index += 1 saltelli_sequence = scale_samples(saltelli_sequence, problem) return saltelli_sequence
[docs] def cli_parse(parser): """Add method specific options to CLI parser. Parameters ---------- parser : argparse object Returns ------- Updated argparse object """ parser.add_argument( "--max-order", type=int, required=False, default=2, choices=[1, 2], help="Maximum order of sensitivity indices \ to calculate", ) parser.add_argument( "--skip-values", type=int, required=False, default=None, help="Number of sample points to skip \ (default: next largest power of 2 from `samples`)", ) # hacky way to remove an argument (seed option not relevant for Saltelli) remove_opts = [x for x in parser._actions if x.dest == "seed"] [ parser._handle_conflict_resolve(None, [("--seed", x), ("-s", x)]) for x in remove_opts ] return parser
[docs] def cli_action(args): """Run sampling method Parameters ---------- args : argparse namespace """ problem = read_param_file(args.paramfile) param_values = sample( problem, args.samples, calc_second_order=(args.max_order == 2), skip_values=args.skip_values, ) np.savetxt( args.output, param_values, delimiter=args.delimiter, fmt="%." + str(args.precision) + "e", )
if __name__ == "__main__": common_args.run_cli(cli_parse, cli_action)