Source code for SALib.sample.morris.morris

import numpy as np
from typing import Dict

import numpy.random as rd
import warnings

from .local import LocalOptimisation
from .brute import BruteForce

from .strategy import SampleMorris

from SALib.sample import common_args
from SALib.util import (scale_samples, read_param_file, compute_groups_matrix,
                        _define_problem_with_groups, _compute_delta,
                        _check_groups)


__all__ = ['sample']


[docs]def sample(problem: Dict, N: int, num_levels: int = 4, optimal_trajectories: int = None, local_optimization: bool = True, seed: int = None) -> np.ndarray: """Generate model inputs using the Method of Morris. Three variants of Morris' sampling for elementary effects is supported: - Vanilla Morris (see [1]) when ``optimal_trajectories`` is ``None``/``False`` and ``local_optimization`` is ``False`` - Optimised trajectories when ``optimal_trajectories=True`` using Campolongo's enhancements (see [2]) and optionally Ruano's enhancement (see [3]) when ``local_optimization=True`` - Morris with groups when the problem definition specifies groups of parameters Results from these model inputs are intended to be used with :func:`SALib.analyze.morris.analyze`. Notes ----- Campolongo et al., [2] introduces an optimal trajectories approach which attempts to maximize the parameter space scanned for a given number of trajectories (where `optimal_trajectories` :math:`\\in {2, ..., N}`). The approach accomplishes this aim by randomly generating a high number of possible trajectories (500 to 1000 in [2]) and selecting a subset of ``r`` trajectories which have the highest spread in parameter space. The ``r`` variable in [2] corresponds to the ``optimal_trajectories`` parameter here. Calculating all possible combinations of trajectories can be computationally expensive. The number of factors makes little difference, but the ratio between number of optimal trajectories and the sample size results in an exponentially increasing number of scores that must be computed to find the optimal combination of trajectories. We suggest going no higher than 4 from a pool of 100 samples with this "brute force" approach. Ruano et al., [3] proposed an alternative approach with an iterative process that maximizes the distance between subgroups of generated trajectories, from which the final set of trajectories are selected, again maximizing the distance between each. The approach is not guaranteed to produce the most optimal spread of trajectories, but are at least locally maximized and significantly reduce the time taken to select trajectories. With ``local_optimization = True`` (which is default), it is possible to go higher than the previously suggested 4 from 100. Parameters ---------- problem : dict The problem definition N : int The number of trajectories to generate num_levels : int, default=4 The number of grid levels (should be even) optimal_trajectories : int The number of optimal trajectories to sample (between 2 and N) local_optimization : bool, default=True Flag whether to use local optimization according to Ruano et al. (2012) Speeds up the process tremendously for bigger N and num_levels. If set to ``False`` brute force method is used, unless ``gurobipy`` is available seed : int Seed to generate a random number Returns ------- sample_morris : np.ndarray Array containing the model inputs required for Method of Morris. The resulting matrix has :math:`(G/D+1)*N/T` rows and :math:`D` columns, where :math:`D` is the number of parameters, :math:`G` is the number of groups (if no groups are selected, the number of parameters). :math:`T` is the number of trajectories :math:`N`, or `optimal_trajectories` if selected. References ---------- .. [1] Morris, M.D., 1991. Factorial Sampling Plans for Preliminary Computational Experiments. Technometrics 33, 161–174. https://doi.org/10.1080/00401706.1991.10484804 .. [2] Campolongo, F., Cariboni, J., & Saltelli, A. 2007. An effective screening design for sensitivity analysis of large models. Environmental Modelling & Software, 22(10), 1509–1518. https://doi.org/10.1016/j.envsoft.2006.10.004 .. [3] Ruano, M.V., Ribes, J., Seco, A., Ferrer, J., 2012. An improved sampling strategy based on trajectory design for application of the Morris method to systems with many input factors. Environmental Modelling & Software 37, 103–109. https://doi.org/10.1016/j.envsoft.2012.03.008 """ if seed: np.random.seed(seed) _check_if_num_levels_is_even(num_levels) problem = _define_problem_with_groups(problem) sample_morris = _sample_morris(problem, N, num_levels) if optimal_trajectories: if local_optimization and (not isinstance(optimal_trajectories, int) or optimal_trajectories > N): msg = ("optimal_trajectories should be an " f"integer between 2 and {N}") raise ValueError(msg) sample_morris = _compute_optimised_trajectories(problem, sample_morris, N, optimal_trajectories, local_optimization) sample_morris = scale_samples(sample_morris, problem) return sample_morris
def _sample_morris(problem: Dict, number_trajectories: int, num_levels: int = 4) -> np.ndarray: """Generate trajectories for groups Returns an :math:`N(g+1)`-by-:math:`k` array of `N` trajectories, where :math:`g` is the number of groups and :math:`k` is the number of factors Parameters --------- problem : dict The problem definition number_trajectories : int The number of trajectories to generate num_levels : int, default=4 The number of grid levels Returns ------- numpy.ndarray """ groups = _check_groups(problem) group_membership, _ = compute_groups_matrix(groups) _check_group_membership(group_membership) num_params = group_membership.shape[0] num_groups = group_membership.shape[1] sample_morris = [_generate_trajectory(group_membership, num_levels) for _ in range(number_trajectories)] sample_morris = np.array(sample_morris) return sample_morris.reshape((number_trajectories * (num_groups + 1), num_params)) def _generate_trajectory(group_membership: np.ndarray, num_levels: int = 4) -> np.ndarray: """Return a single trajectory Return a single trajectory of size :math:`(g+1)`-by-:math:`k` where :math:`g` is the number of groups, and :math:`k` is the number of factors, both implied by the dimensions of `group_membership` Parameters --------- group_membership : np.ndarray a k-by-g matrix which notes factor membership of groups num_levels : int, default=4 The number of levels in the grid Returns ------- np.ndarray """ delta = _compute_delta(num_levels) # Infer number of groups `g` and number of params `k` from # `group_membership` matrix num_params = group_membership.shape[0] num_groups = group_membership.shape[1] # Matrix B - size (g + 1) * g - lower triangular matrix B = np.tril(np.ones([num_groups + 1, num_groups], dtype=int), -1) P_star = _generate_p_star(num_groups) # Matrix J - a (g+1)-by-num_params matrix of ones J = np.ones((num_groups + 1, num_params)) # Matrix D* - num_params-by-num_params matrix which decribes whether # factors move up or down D_star = np.diag(rd.choice([-1, 1], num_params)) x_star = _generate_x_star(num_params, num_levels) # Matrix B* - size (num_groups + 1) * num_params B_star = _compute_b_star(J, x_star, delta, B, group_membership, P_star, D_star) return B_star def _compute_b_star(J: np.ndarray, x_star: np.ndarray, delta: float, B: np.ndarray, G: np.ndarray, P_star: np.ndarray, D_star: np.ndarray) -> np.ndarray: """ Compute the random sampling matrix B*. Parameters ---------- J: matrix of 1's x_star: randomly chosen "base value" of x delta: parameters variation B: sampling matrix (not random) G: groups matrix P_star: random permutation matrix D_star: diagonal matrix with each element being +1 or -1, with equal probability Returns ------- Random sampling matrix B* """ element_a = J[0, :] * x_star element_b = np.matmul(G, P_star).T element_c = np.matmul(2.0 * B, element_b) element_d = np.matmul((element_c - J), D_star) b_star = element_a + (delta / 2.0) * (element_d + J) return b_star def _generate_p_star(num_groups: int) -> np.ndarray: """Describe the order in which groups move Parameters --------- num_groups : int Returns ------- np.ndarray Matrix P* - size (g-by-g) """ p_star = np.eye(num_groups, num_groups) rd.shuffle(p_star) return p_star def _generate_x_star(num_params: int, num_levels: int) -> np.ndarray: """Generate an 1-by-num_params array to represent initial position for EE This should be a randomly generated array in the p level grid :math:`\\omega` Parameters --------- num_params : int The number of parameters (factors) num_levels : int The number of levels Returns ------- numpy.ndarray The initial starting positions of the trajectory """ x_star = np.zeros((1, num_params)) delta = _compute_delta(num_levels) bound = 1 - delta grid = np.linspace(0, bound, int(num_levels / 2)) x_star[0, :] = rd.choice(grid, num_params) return x_star def _compute_optimised_trajectories(problem: Dict, input_sample: int, N: int, k_choices: int, local_optimization: bool = False) \ -> np.ndarray: """ Calls the procedure to compute the optimum k_choices of trajectories from the input_sample. If there are groups, then this procedure allocates the groups to the correct call here. Parameters --------- problem : dict The problem definition input_sample : N : int The number of samples to generate k_choices : int The number of optimal trajectories local_optimization : bool, default=False If true, uses local optimisation heuristic """ if local_optimization is False and k_choices > 10: msg = "Running optimal trajectories greater than values of 10 \ will take a long time." raise ValueError(msg) if np.any((input_sample < 0) | (input_sample > 1)): raise ValueError("Input sample must be scaled between 0 and 1") num_groups = len(set(problem['groups'])) num_params = problem['num_vars'] strategy = _choose_optimization_strategy(local_optimization) context = SampleMorris(strategy) output = context.sample(input_sample, N, num_params, k_choices, num_groups) return output def _check_if_num_levels_is_even(num_levels: int): """ Checks if the number of levels is even. If not, raises a warn. Parameters ---------- num_levels: int Number of levels """ if not num_levels % 2 == 0: warnings.warn("num_levels should be an even number, " "sample may be biased") def _check_group_membership(group_membership: np.ndarray): """ Checks if the group_membership matrix was correctly defined Parameters ---------- group_membership: np.array Group membership matrix """ if group_membership is None: raise ValueError("Please define the 'group_membership' matrix") if not isinstance(group_membership, np.ndarray): raise TypeError("Argument 'group_membership' should be formatted \ as a numpy np.ndarray") def _choose_optimization_strategy(local_optimization: bool): """ Choose the strategy to optimize the trajectories. Parameters ---------- local_optimization: boolean to indicate if a local optimization should be used. Returns ------- """ if local_optimization: # Use local method strategy = LocalOptimisation() else: # Use brute force approach strategy = BruteForce() return strategy def cli_parse(parser): parser.add_argument('-l', '--levels', type=int, required=False, default=4, help='Number of grid levels \ (Morris only)') parser.add_argument('-k', '--k-optimal', type=int, required=False, default=None, help='Number of optimal trajectories \ (Morris only)') parser.add_argument('-lo', '--local', type=bool, required=True, default=False, help='Use the local optimisation method \ (Morris with optimization only)') return parser def cli_action(args): rd.seed(args.seed) problem = read_param_file(args.paramfile) param_values = sample(problem, args.samples, args.levels, args.k_optimal, args.local) np.savetxt(args.output, param_values, delimiter=args.delimiter, fmt='%.' + str(args.precision) + 'e') if __name__ == "__main__": common_args.run_cli(cli_parse, cli_action)