Source code for SALib.analyze.morris

from typing import Dict, List
import numpy as np
from scipy.stats import norm

from . import common_args
from ..util import (
    read_param_file,
    compute_groups_matrix,
    ResultDict,
    _define_problem_with_groups,
    _compute_delta,
    handle_seed,
)



[docs]
def analyze(
    problem: Dict,
    X: np.ndarray,
    Y: np.ndarray,
    num_resamples: int = 100,
    conf_level: float = 0.95,
    scaled: bool = False,
    print_to_console: bool = False,
    num_levels: int = 4,
    seed=None,
) -> Dict:
    """Perform Morris Analysis on model outputs.

    Returns a result set with keys ``mu``, ``mu_star``, ``sigma``, and
    ``mu_star_conf``, where each entry corresponds to the parameters
    defined in the problem spec or parameter file.

    - ``mu`` metric indicates the mean of the distribution
    - ``mu_star`` metric indicates the mean of the distribution of absolute
      values
    - ``sigma`` is the standard deviation of the distribution

    When ``scaled`` is True, the elementary effects are scaled by the ratio of
    standard deviation of ``X`` and ``Y`` according to [3]. When using this
    option it is important to ensure that ``X`` contains the actual values
    passed into the model, as the elementary effects are divided by the
    step calculated from ``X`` rather than using `delta` which is calculated from
    the number of levels used in the sample. This could be the case if you
    perform post-processing on the values before passing them to the model.

    Scaled elementary effects are useful when comparing different model outputs
    with each other when the input and output parameters have different scales.
    The ranking between the ordinary elementary effects and the scaled should be
    the same.

    Notes
    -----
    When applied with groups, the ``mu`` metric is less reliable as the effect
    from parameters within a group become averaged out.

    The ``mu_star`` metric avoids this issue as it indicates the mean of the
    absolute values. If the direction of effects is important, Campolongo et
    al., [2] suggest comparing ``mu_star`` with ``mu``. If ``mu`` is low
    and ``mu_star`` is high, then the effects are of different signs.

    ``sigma`` is used as an indicator of interactions between parameters, or
    groups of parameters.

    Compatible with:
        `morris` : :func:`SALib.sample.morris.sample`

    Examples
    --------
    >>> X = morris.sample(problem, 1000, num_levels=4)
    >>> Y = Ishigami.evaluate(X)
    >>> Si = morris.analyze(problem, X, Y, conf_level=0.95,
    >>>                     print_to_console=True, num_levels=4)


    Parameters
    ----------
    problem : dict
        The problem definition
    X : numpy.array
        The NumPy matrix containing the model inputs of dtype=float
    Y : numpy.array
        The NumPy array containing the model outputs of dtype=float
    scaled : bool, default=False
        If True, the elementary effects are scaled by the ratio of
        standard deviation of X and Y according to [3]
    num_resamples : int
        The number of resamples used to compute the confidence
        intervals (default 100)
    conf_level : float
        The confidence interval level (default 0.95)
    print_to_console : bool
        Print results directly to console (default False)
    num_levels : int
        The number of grid levels, must be identical to the value
        passed to SALib.sample.morris (default 4)
    seed : {int, None, np.random.Generator}
        Seed to generate a random number

    Returns
    -------
    Si : dict
        A dictionary of sensitivity indices containing the following entries.

        - `mu` - the mean elementary effect
        - `mu_star` - the absolute of the mean elementary effect
        - `sigma` - the standard deviation of the elementary effect
        - `mu_star_conf` - the bootstrapped confidence interval
        - `names` - the names of the parameters


    References
    ----------
    1. Morris, M. (1991).
       Factorial Sampling Plans for Preliminary Computational Experiments.
       Technometrics, 33(2):161-174,
       doi:10.1080/00401706.1991.10484804.

    2. Campolongo, F., J. Cariboni, and A. Saltelli (2007).
       An effective screening design for sensitivity analysis
       of large models.
       Environmental Modelling & Software, 22(10):1509-1518,
       doi:10.1016/j.envsoft.2006.10.004.

    3. Sin and Gearney (2009)
       Improving the Morris Method for Sensitivity Analysis
       by Scaling the Elementary Effects.
       19th European Symposium on Computer Aided Process Engineering ESCAPE19:925-930

    4. Moret et al. (2017)
        Characterization of input uncertainties in strategic
        energy planning models.
        Applied Energy, Volume 202, 15 September 2017, Pages 597-617
        https://doi.org/10.1016/j.apenergy.2017.05.106
    """

    rng = handle_seed(seed)

    _define_problem_with_groups(problem)

    _check_if_array_of_floats(X)
    _check_if_array_of_floats(Y)

    delta = _compute_delta(num_levels)

    num_vars = problem["num_vars"]
    groups, unique_group_names = compute_groups_matrix(problem["groups"])
    number_of_groups = len(unique_group_names)

    num_trajectories = int(Y.size / (number_of_groups + 1))
    trajectory_size = int(Y.size / num_trajectories)

    elementary_effects = _compute_elementary_effects(
        X, Y, trajectory_size, delta, scaling=scaled
    )

    Si = _compute_statistical_outputs(
        elementary_effects,
        num_vars,
        num_resamples,
        conf_level,
        groups,
        unique_group_names,
        rng,
    )

    if print_to_console:
        print(Si.to_df())

    return Si



def _compute_statistical_outputs(
    elementary_effects: np.ndarray,
    num_vars: int,
    num_resamples: int,
    conf_level: float,
    groups: np.ndarray,
    unique_group_names: List,
    rng: np.random.Generator,
) -> ResultDict:
    """Computes the statistical parameters related to Morris method.

    Parameters
    ----------
    elementary_effects: np.ndarray
        Morris elementary effects.
    num_vars: int
        Number of problem's variables
    num_resamples: int
        Number of resamples
    conf_level: float
        Confidence level
    groups: np.ndarray
        Array defining the distribution of groups
    unique_group_names: List
        Names of the groups
    rng: np.random.Generator

    Returns
    -------
    Si: ResultDict
        Morris statistical parameters.
    """

    Si = ResultDict(
        (k, [None] * num_vars)
        for k in ["names", "mu", "mu_star", "sigma", "mu_star_conf"]
    )
    Si["names"] = unique_group_names

    mu = np.average(elementary_effects, 1)
    mu_star = np.average(np.abs(elementary_effects), 1)
    sigma = np.std(elementary_effects, axis=1, ddof=1)
    mu_star_conf = _compute_mu_star_confidence(
        elementary_effects, num_vars, num_resamples, conf_level, rng
    )

    Si["mu"] = _compute_grouped_metric(mu, groups)
    Si["mu_star"] = _compute_grouped_metric(mu_star, groups)
    Si["sigma"] = _compute_grouped_sigma(sigma, groups)
    Si["mu_star_conf"] = _compute_grouped_metric(mu_star_conf, groups)

    return Si


def _compute_grouped_sigma(
    ungrouped_sigma: np.ndarray, groups: np.ndarray
) -> np.ndarray:
    """Sigma values for the groups.

    Returns sigma for the groups of parameter values in the argument
    ungrouped_metric where the group consists of no more than
    one parameter

    Parameters
    ----------
    ungrouped_sigma: np.ndarray
        Sigma values calculated without considering the groups
    groups: np.ndarray
        Array defining the distribution of groups

    Returns
    -------
    sigma: np.ndarray
        Sigma values for the groups.
    """
    sigma_agg = _compute_grouped_metric(ungrouped_sigma, groups)

    sigma = np.zeros(groups.shape[1], dtype=float)
    np.copyto(sigma, sigma_agg, where=groups.sum(axis=0) == 1)
    np.copyto(sigma, np.nan, where=groups.sum(axis=0) != 1)

    return sigma


def _compute_grouped_metric(
    ungrouped_metric: np.ndarray, groups: np.ndarray
) -> np.ndarray:
    """Computes the mean value for the groups of parameter values.

    Parameters
    ----------
    ungrouped_metric: np.ndarray
        Metric calculated without considering the groups
    groups: np.ndarray
        Array defining the distribution of groups

    Returns
    -------
    mean_of_mu_star: np.ndarray
         Mean value for the groups of parameter values
    """

    groups = np.array(groups, dtype=bool)

    mu_star_masked = np.ma.masked_array(
        ungrouped_metric * groups.T, mask=(groups ^ 1).T
    )
    mean_of_mu_star = np.ma.mean(mu_star_masked, axis=1)

    return mean_of_mu_star


def _reorganize_output_matrix(
    output_array: np.ndarray,
    value_increased: np.ndarray,
    value_decreased: np.ndarray,
    increase: bool = True,
) -> np.ndarray:
    """Reorganize the output matrix.

    This method reorganizes the output matrix in a way that allows the
    elementary effects to be computed as a simple subtraction between two
    arrays. It repositions the outputs in the output matrix according to the
    order they changed during the formation of the trajectories.

    Parameters
    ----------
    output_array: np.ndarray
        Matrix of model output values
    value_increased: np.ndarray
        Input variables that had their values increased when forming the
        trajectories matrix
    value_decreased: np.ndarray
        Input variables that had their values decreased when forming the
        trajectories matrix
    increase: bool
        Direction to consider (values that increased or decreased). "Increase"
        is the default value.

    Returns
    -------
    np.ndarray
    """
    if increase:
        pad_up = (1, 0)
        pad_lo = (0, 1)
    else:
        pad_up = (0, 1)
        pad_lo = (1, 0)

    value_increased = np.pad(value_increased, ((0, 0), pad_up, (0, 0)), "constant")
    value_decreased = np.pad(value_decreased, ((0, 0), pad_lo, (0, 0)), "constant")

    res = np.einsum("ik,ikj->ij", output_array, value_increased + value_decreased)

    return res.T


def _compute_elementary_effects(
    model_inputs: np.ndarray,
    model_outputs: np.ndarray,
    trajectory_size: int,
    delta: float,
    scaling: bool = False,
) -> np.ndarray:
    """Computes the Morris elementary effects.

    Parameters
    ----------
    model_inputs: np.ndarray
        matrix of inputs to the model under analysis.
        x-by-r where x is the number of variables and
        r is the number of rows (a function of x and num_trajectories)
    model_outputs: np.ndarray
        r-length vector of model outputs
    trajectory_size: int
        Number of points in a trajectory
    delta: float
        Scaling factor computed from `num_levels`
    scaling: bool
        True if the elementary effects should be in scaled as in [1]

    Returns
    ---------
    elementary_effects : np.array
        Elementary effects for each parameter

    References
    ----------
    1. Sin and Gearney (2009)
       Improving the Morris Method for Sensitivity Analysis
       by Scaling the Elementary Effects.
       19th European Symposium on Computer Aided Process Engineering ESCAPE19:925-930

    2. Moret et al. (2017)
        Characterization of input uncertainties in strategic
        energy planning models.
        Applied Energy, Volume 202, 15 September 2017, Pages 597-617
        https://doi.org/10.1016/j.apenergy.2017.05.106
    """
    num_trajectories = _calculate_number_trajectories(model_inputs, trajectory_size)

    output_matrix = _reshape_model_outputs(
        model_outputs, num_trajectories, trajectory_size
    )
    input_matrix = _reshape_model_inputs(
        model_inputs, num_trajectories, trajectory_size
    )

    delta_variables = _calculate_delta_input_variables(input_matrix)
    value_increased = delta_variables > 0
    value_decreased = delta_variables < 0

    result_increased = _reorganize_output_matrix(
        output_matrix, value_increased, value_decreased
    )
    result_decreased = _reorganize_output_matrix(
        output_matrix, value_increased, value_decreased, increase=False
    )

    difference = _calc_results_difference(result_increased, result_decreased)

    if scaling:
        # calculate the scaled value of delta (step size)
        dx_trajectory = _calculate_step_size_x(input_matrix)

        see_dy_dx = np.divide(difference, dx_trajectory.T)

        sigma_x_per_trajectory = np.std(input_matrix, axis=1)
        sigma_y_per_trajectory = np.std(output_matrix, axis=1)

        # Create a mask to avoid division by zero
        mask = sigma_y_per_trajectory[:, np.newaxis] != 0

        adjustment_trajectory = np.divide(
            sigma_x_per_trajectory,
            sigma_y_per_trajectory[:, np.newaxis],
            out=np.zeros_like(sigma_x_per_trajectory),
            where=mask,
        )
        elementary_effects = see_dy_dx * adjustment_trajectory.T

    else:
        elementary_effects = difference / delta

    return elementary_effects


def _calc_results_difference(
    result_up: np.ndarray, result_lo: np.ndarray
) -> np.ndarray:
    """Computes the difference between the output points.

    Parameters
    ----------
    result_up: np.ndarray
    result_lo: np.ndarray

    Returns
    -------
    results_difference: np.ndarray
        Difference between the output points
    """
    results_difference = np.subtract(result_up, result_lo)

    return results_difference


def _calculate_delta_input_variables(input_matrix: np.ndarray) -> np.ndarray:
    """Computes the delta values of the problem variables.

    For each point of the trajectory, computes how much each variable increased
    or decreased in respect to the previous point.

    Parameters
    ----------
    input_matrix: np.ndarray
        Matrix with the values of the problem's variables for all input points.

    Returns
    -------
    delta_variables: np.ndarray
        Variation of each variable, for each point in the trajectory.
    """
    delta_variables = np.subtract(input_matrix[:, 1:, :], input_matrix[:, 0:-1, :])

    return delta_variables


def _calculate_number_trajectories(
    model_inputs: np.ndarray, trajectory_size: int
) -> int:
    """Calculate the number of trajectories.

    Parameters
    ----------
    model_inputs: np.ndarray
        Matrix of model inputs
    trajectory_size: int
        Number of input points in each trajectory

    Returns
    -------
    num_trajectories: int
        Number of trajectories
    """
    num_input_points = model_inputs.shape[0]
    num_trajectories = int(num_input_points / trajectory_size)

    return num_trajectories


def _reshape_model_inputs(
    model_inputs: np.ndarray, num_trajectories: int, trajectory_size: int
) -> np.ndarray:
    """Reshapes the model inputs' matrix.

    Parameters
    ----------
    model_inputs: np.ndarray
        Matrix of model inputs
    num_trajectories: int
        Number of trajectories
    trajectory_size: int
        Number of points in a trajectory

    Returns
    -------
    input_matrix: np.ndarray
        Reshaped input matrix.
    """
    num_vars = model_inputs.shape[1]
    input_matrix = model_inputs.reshape(num_trajectories, trajectory_size, num_vars)
    return input_matrix


def _reshape_model_outputs(
    model_outputs: np.ndarray, num_trajectories: int, trajectory_size: int
):
    """Reshapes the model outputs' matrix.

    Parameters
    ----------
    model_outputs: np.ndarray
        Matrix of model outputs
    num_trajectories: int
        Number of trajectories
    trajectory_size: int
        Number of points in a trajectory

    Returns
    -------
    output_matrix: np.ndarray
        Reshaped output matrix.
    """
    output_matrix = model_outputs.reshape(num_trajectories, trajectory_size)
    return output_matrix


def _compute_mu_star_confidence(
    elementary_effects: np.ndarray,
    num_vars: int,
    num_resamples: int,
    conf_level: float,
    rng: np.random.Generator,
) -> np.ndarray:
    """Computes the confidence intervals for the mu_star variable.

    Uses bootstrapping where the elementary effects are resampled with
    replacement to produce a histogram of resampled mu_star metrics.
    This resample is used to produce a confidence interval.

    Parameters
    ----------
    elementary_effects : np.array
        Elementary effects for each parameter
    num_vars: int
        Number of problem's variables
    num_resamples: int
        Number of resamples
    conf_level: float
        Confidence level
    rng: np.random.Generator

    Returns
    -------
    mu_star_conf: np.ndarray
        Confidence intervals for the mu_star variable
    """
    if not 0 < conf_level < 1:
        raise ValueError("Confidence level must be between 0-1.")

    mu_star_conf = []
    for j in range(num_vars):
        ee = elementary_effects[j, :]
        resample_index = rng.integers(len(ee), size=(num_resamples, len(ee)))
        ee_resampled = ee[resample_index]

        # Compute average of the absolute values over each of the resamples
        mu_star_resampled = np.average(np.abs(ee_resampled), axis=1)

        mu_star_conf.append(
            norm.ppf(0.5 + conf_level / 2) * mu_star_resampled.std(ddof=1)
        )

    mu_star_conf = np.asarray(mu_star_conf)

    return mu_star_conf


def _calculate_step_size_x(input_matrix):
    """This function calculates the step of dx for scaled elementary effects"""
    max_y = np.max(input_matrix, axis=1)
    min_y = np.min(input_matrix, axis=1)

    delta = max_y - min_y
    return delta


def _check_if_array_of_floats(array_x: np.ndarray):
    """Checks if an arrays is made of floats. If not, raises an error.

    Parameters
    ----------
    array_x:
        Array to be checked
    """
    msg = "dtype of {} array must be 'float', float32 or float64"
    if array_x.dtype not in ["float", "float32", "float64"]:
        raise ValueError(msg.format(array_x))



[docs]
def cli_parse(parser):
    parser.add_argument(
        "-X",
        "--model-input-file",
        type=str,
        required=True,
        default=None,
        help="Model input file",
    )
    parser.add_argument(
        "-r",
        "--resamples",
        type=int,
        required=False,
        default=1000,
        help="Number of bootstrap resamples for Morris \
                           confidence intervals",
    )
    parser.add_argument(
        "-l",
        "--levels",
        type=int,
        required=False,
        default=4,
        help="Number of grid levels \
                           (Morris only)",
    )
    parser.add_argument(
        "--scaled",
        type=bool,
        required=False,
        default=False,
        help="Scale the results by ratio of stddev of X and Y (Morris)",
    )
    return parser




[docs]
def cli_action(args):
    problem = read_param_file(args.paramfile)
    Y = np.loadtxt(
        args.model_output_file, delimiter=args.delimiter, usecols=(args.column,)
    )
    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
    if len(X.shape) == 1:
        X = X.reshape((len(X), 1))

    analyze(
        problem,
        X,
        Y,
        num_resamples=args.resamples,
        scaled=args.scaled,
        print_to_console=True,
        num_levels=args.levels,
        seed=args.seed,
    )



if __name__ == "__main__":
    common_args.run_cli(cli_parse, cli_action)