from typing import Dict, List
import numpy as np
from scipy.stats import norm
from . import common_args
from ..util import (
read_param_file,
compute_groups_matrix,
ResultDict,
_define_problem_with_groups,
_compute_delta,
)
[docs]
def analyze(
problem: Dict,
X: np.ndarray,
Y: np.ndarray,
num_resamples: int = 100,
conf_level: float = 0.95,
scaled: bool = False,
print_to_console: bool = False,
num_levels: int = 4,
seed=None,
) -> Dict:
"""Perform Morris Analysis on model outputs.
Returns a result set with keys ``mu``, ``mu_star``, ``sigma``, and
``mu_star_conf``, where each entry corresponds to the parameters
defined in the problem spec or parameter file.
- ``mu`` metric indicates the mean of the distribution
- ``mu_star`` metric indicates the mean of the distribution of absolute
values
- ``sigma`` is the standard deviation of the distribution
When ``scaled`` is True, the elementary effects are scaled by the ratio of
standard deviation of ``X`` and ``Y`` according to [3]. When using this
option it is important to ensure that ``X`` contains the actual values
passed into the model, as the elementary effects are divided by the
step calculated from ``X`` rather than using `delta` which is calculated from
the number of levels used in the sample. This could be the case if you
perform post-processing on the values before passing them to the model.
Scaled elementary effects are useful when comparing different model outputs
with each other when the input and output parameters have different scales.
The ranking between the ordinary elementary effects and the scaled should be
the same.
Notes
-----
When applied with groups, the ``mu`` metric is less reliable as the effect
from parameters within a group become averaged out.
The ``mu_star`` metric avoids this issue as it indicates the mean of the
absolute values. If the direction of effects is important, Campolongo et
al., [2] suggest comparing ``mu_star`` with ``mu``. If ``mu`` is low
and ``mu_star`` is high, then the effects are of different signs.
``sigma`` is used as an indicator of interactions between parameters, or
groups of parameters.
Compatible with:
`morris` : :func:`SALib.sample.morris.sample`
Examples
--------
>>> X = morris.sample(problem, 1000, num_levels=4)
>>> Y = Ishigami.evaluate(X)
>>> Si = morris.analyze(problem, X, Y, conf_level=0.95,
>>> print_to_console=True, num_levels=4)
Parameters
----------
problem : dict
The problem definition
X : numpy.array
The NumPy matrix containing the model inputs of dtype=float
Y : numpy.array
The NumPy array containing the model outputs of dtype=float
scaled : bool, default=False
If True, the elementary effects are scaled by the ratio of
standard deviation of X and Y according to [3]
num_resamples : int
The number of resamples used to compute the confidence
intervals (default 1000)
conf_level : float
The confidence interval level (default 0.95)
print_to_console : bool
Print results directly to console (default False)
num_levels : int
The number of grid levels, must be identical to the value
passed to SALib.sample.morris (default 4)
seed : int
Seed to generate a random number
Returns
-------
Si : dict
A dictionary of sensitivity indices containing the following entries.
- `mu` - the mean elementary effect
- `mu_star` - the absolute of the mean elementary effect
- `sigma` - the standard deviation of the elementary effect
- `mu_star_conf` - the bootstrapped confidence interval
- `names` - the names of the parameters
References
----------
1. Morris, M. (1991).
Factorial Sampling Plans for Preliminary Computational Experiments.
Technometrics, 33(2):161-174,
doi:10.1080/00401706.1991.10484804.
2. Campolongo, F., J. Cariboni, and A. Saltelli (2007).
An effective screening design for sensitivity analysis
of large models.
Environmental Modelling & Software, 22(10):1509-1518,
doi:10.1016/j.envsoft.2006.10.004.
3. Sin and Gearney (2009)
Improving the Morris Method for Sensitivity Analysis
by Scaling the Elementary Effects.
19th European Symposium on Computer Aided Process Engineering ESCAPE19:925-930
4. Moret et al. (2017)
Characterization of input uncertainties in strategic
energy planning models.
Applied Energy, Volume 202, 15 September 2017, Pages 597-617
https://doi.org/10.1016/j.apenergy.2017.05.106
"""
if seed:
np.random.seed(seed)
_define_problem_with_groups(problem)
_check_if_array_of_floats(X)
_check_if_array_of_floats(Y)
delta = _compute_delta(num_levels)
num_vars = problem["num_vars"]
groups, unique_group_names = compute_groups_matrix(problem["groups"])
number_of_groups = len(unique_group_names)
num_trajectories = int(Y.size / (number_of_groups + 1))
trajectory_size = int(Y.size / num_trajectories)
elementary_effects = _compute_elementary_effects(
X, Y, trajectory_size, delta, scaling=scaled
)
Si = _compute_statistical_outputs(
elementary_effects,
num_vars,
num_resamples,
conf_level,
groups,
unique_group_names,
)
if print_to_console:
print(Si.to_df())
return Si
def _compute_statistical_outputs(
elementary_effects: np.ndarray,
num_vars: int,
num_resamples: int,
conf_level: float,
groups: np.ndarray,
unique_group_names: List,
) -> ResultDict:
"""Computes the statistical parameters related to Morris method.
Parameters
----------
elementary_effects: np.ndarray
Morris elementary effects.
num_vars: int
Number of problem's variables
num_resamples: int
Number of resamples
conf_level: float
Confidence level
groups: np.ndarray
Array defining the distribution of groups
unique_group_names: List
Names of the groups
Returns
-------
Si: ResultDict
Morris statistical parameters.
"""
Si = ResultDict(
(k, [None] * num_vars)
for k in ["names", "mu", "mu_star", "sigma", "mu_star_conf"]
)
Si["names"] = unique_group_names
mu = np.average(elementary_effects, 1)
mu_star = np.average(np.abs(elementary_effects), 1)
sigma = np.std(elementary_effects, axis=1, ddof=1)
mu_star_conf = _compute_mu_star_confidence(
elementary_effects, num_vars, num_resamples, conf_level
)
Si["mu"] = _compute_grouped_metric(mu, groups)
Si["mu_star"] = _compute_grouped_metric(mu_star, groups)
Si["sigma"] = _compute_grouped_sigma(sigma, groups)
Si["mu_star_conf"] = _compute_grouped_metric(mu_star_conf, groups)
return Si
def _compute_grouped_sigma(
ungrouped_sigma: np.ndarray, groups: np.ndarray
) -> np.ndarray:
"""Sigma values for the groups.
Returns sigma for the groups of parameter values in the argument
ungrouped_metric where the group consists of no more than
one parameter
Parameters
----------
ungrouped_sigma: np.ndarray
Sigma values calculated without considering the groups
groups: np.ndarray
Array defining the distribution of groups
Returns
-------
sigma: np.ndarray
Sigma values for the groups.
"""
sigma_agg = _compute_grouped_metric(ungrouped_sigma, groups)
sigma = np.zeros(groups.shape[1], dtype=float)
np.copyto(sigma, sigma_agg, where=groups.sum(axis=0) == 1)
np.copyto(sigma, np.NAN, where=groups.sum(axis=0) != 1)
return sigma
def _compute_grouped_metric(
ungrouped_metric: np.ndarray, groups: np.ndarray
) -> np.ndarray:
"""Computes the mean value for the groups of parameter values.
Parameters
----------
ungrouped_metric: np.ndarray
Metric calculated without considering the groups
groups: np.ndarray
Array defining the distribution of groups
Returns
-------
mean_of_mu_star: np.ndarray
Mean value for the groups of parameter values
"""
groups = np.array(groups, dtype=bool)
mu_star_masked = np.ma.masked_array(
ungrouped_metric * groups.T, mask=(groups ^ 1).T
)
mean_of_mu_star = np.ma.mean(mu_star_masked, axis=1)
return mean_of_mu_star
def _reorganize_output_matrix(
output_array: np.ndarray,
value_increased: np.ndarray,
value_decreased: np.ndarray,
increase: bool = True,
) -> np.ndarray:
"""Reorganize the output matrix.
This method reorganizes the output matrix in a way that allows the
elementary effects to be computed as a simple subtraction between two
arrays. It repositions the outputs in the output matrix according to the
order they changed during the formation of the trajectories.
Parameters
----------
output_array: np.ndarray
Matrix of model output values
value_increased: np.ndarray
Input variables that had their values increased when forming the
trajectories matrix
value_decreased: np.ndarray
Input variables that had their values decreased when forming the
trajectories matrix
increase: bool
Direction to consider (values that increased or decreased). "Increase"
is the default value.
Returns
-------
np.ndarray
"""
if increase:
pad_up = (1, 0)
pad_lo = (0, 1)
else:
pad_up = (0, 1)
pad_lo = (1, 0)
value_increased = np.pad(value_increased, ((0, 0), pad_up, (0, 0)), "constant")
value_decreased = np.pad(value_decreased, ((0, 0), pad_lo, (0, 0)), "constant")
res = np.einsum("ik,ikj->ij", output_array, value_increased + value_decreased)
return res.T
def _compute_elementary_effects(
model_inputs: np.ndarray,
model_outputs: np.ndarray,
trajectory_size: int,
delta: float,
scaling: bool = False,
) -> np.ndarray:
"""Computes the Morris elementary effects.
Parameters
----------
model_inputs: np.ndarray
matrix of inputs to the model under analysis.
x-by-r where x is the number of variables and
r is the number of rows (a function of x and num_trajectories)
model_outputs: np.ndarray
r-length vector of model outputs
trajectory_size: int
Number of points in a trajectory
delta: float
Scaling factor computed from `num_levels`
scaling: bool
True if the elementary effects should be in scaled as in [1]
Returns
---------
elementary_effects : np.array
Elementary effects for each parameter
References
----------
1. Sin and Gearney (2009)
Improving the Morris Method for Sensitivity Analysis
by Scaling the Elementary Effects.
19th European Symposium on Computer Aided Process Engineering ESCAPE19:925-930
2. Moret et al. (2017)
Characterization of input uncertainties in strategic
energy planning models.
Applied Energy, Volume 202, 15 September 2017, Pages 597-617
https://doi.org/10.1016/j.apenergy.2017.05.106
"""
num_trajectories = _calculate_number_trajectories(model_inputs, trajectory_size)
output_matrix = _reshape_model_outputs(
model_outputs, num_trajectories, trajectory_size
)
input_matrix = _reshape_model_inputs(
model_inputs, num_trajectories, trajectory_size
)
delta_variables = _calculate_delta_input_variables(input_matrix)
value_increased = delta_variables > 0
value_decreased = delta_variables < 0
result_increased = _reorganize_output_matrix(
output_matrix, value_increased, value_decreased
)
result_decreased = _reorganize_output_matrix(
output_matrix, value_increased, value_decreased, increase=False
)
difference = _calc_results_difference(result_increased, result_decreased)
if scaling:
# calculate the scaled value of delta (step size)
dx_trajectory = _calculate_step_size_x(input_matrix)
see_dy_dx = np.divide(difference, dx_trajectory.T)
sigma_x_per_trajectory = np.std(input_matrix, axis=1)
sigma_y_per_trajectory = np.std(output_matrix, axis=1)
# Create a mask to avoid division by zero
mask = sigma_y_per_trajectory[:, np.newaxis] != 0
adjustment_trajectory = np.divide(
sigma_x_per_trajectory,
sigma_y_per_trajectory[:, np.newaxis],
out=np.zeros_like(sigma_x_per_trajectory),
where=mask,
)
elementary_effects = see_dy_dx * adjustment_trajectory.T
else:
elementary_effects = difference / delta
return elementary_effects
def _calc_results_difference(
result_up: np.ndarray, result_lo: np.ndarray
) -> np.ndarray:
"""Computes the difference between the output points.
Parameters
----------
result_up: np.ndarray
result_lo: np.ndarray
Returns
-------
results_difference: np.ndarray
Difference between the output points
"""
results_difference = np.subtract(result_up, result_lo)
return results_difference
def _calculate_delta_input_variables(input_matrix: np.ndarray) -> np.ndarray:
"""Computes the delta values of the problem variables.
For each point of the trajectory, computes how much each variable increased
or decreased in respect to the previous point.
Parameters
----------
input_matrix: np.ndarray
Matrix with the values of the problem's variables for all input points.
Returns
-------
delta_variables: np.ndarray
Variation of each variable, for each point in the trajectory.
"""
delta_variables = np.subtract(input_matrix[:, 1:, :], input_matrix[:, 0:-1, :])
return delta_variables
def _calculate_number_trajectories(
model_inputs: np.ndarray, trajectory_size: int
) -> int:
"""Calculate the number of trajectories.
Parameters
----------
model_inputs: np.ndarray
Matrix of model inputs
trajectory_size: int
Number of input points in each trajectory
Returns
-------
num_trajectories: int
Number of trajectories
"""
num_input_points = model_inputs.shape[0]
num_trajectories = int(num_input_points / trajectory_size)
return num_trajectories
def _reshape_model_inputs(
model_inputs: np.ndarray, num_trajectories: int, trajectory_size: int
) -> np.ndarray:
"""Reshapes the model inputs' matrix.
Parameters
----------
model_inputs: np.ndarray
Matrix of model inputs
num_trajectories: int
Number of trajectories
trajectory_size: int
Number of points in a trajectory
Returns
-------
input_matrix: np.ndarray
Reshaped input matrix.
"""
num_vars = model_inputs.shape[1]
input_matrix = model_inputs.reshape(num_trajectories, trajectory_size, num_vars)
return input_matrix
def _reshape_model_outputs(
model_outputs: np.ndarray, num_trajectories: int, trajectory_size: int
):
"""Reshapes the model outputs' matrix.
Parameters
----------
model_outputs: np.ndarray
Matrix of model outputs
num_trajectories: int
Number of trajectories
trajectory_size: int
Number of points in a trajectory
Returns
-------
output_matrix: np.ndarray
Reshaped output matrix.
"""
output_matrix = model_outputs.reshape(num_trajectories, trajectory_size)
return output_matrix
def _compute_mu_star_confidence(
elementary_effects: np.ndarray, num_vars: int, num_resamples: int, conf_level: float
) -> np.ndarray:
"""Computes the confidence intervals for the mu_star variable.
Uses bootstrapping where the elementary effects are resampled with
replacement to produce a histogram of resampled mu_star metrics.
This resample is used to produce a confidence interval.
Parameters
----------
elementary_effects : np.array
Elementary effects for each parameter
num_vars: int
Number of problem's variables
num_resamples: int
Number of resamples
conf_level: float
Confidence level
Returns
-------
mu_star_conf: np.ndarray
Confidence intervals for the mu_star variable
"""
if not 0 < conf_level < 1:
raise ValueError("Confidence level must be between 0-1.")
mu_star_conf = []
for j in range(num_vars):
ee = elementary_effects[j, :]
resample_index = np.random.randint(len(ee), size=(num_resamples, len(ee)))
ee_resampled = ee[resample_index]
# Compute average of the absolute values over each of the resamples
mu_star_resampled = np.average(np.abs(ee_resampled), axis=1)
mu_star_conf.append(
norm.ppf(0.5 + conf_level / 2) * mu_star_resampled.std(ddof=1)
)
mu_star_conf = np.asarray(mu_star_conf)
return mu_star_conf
def _calculate_step_size_x(input_matrix):
"""This function calculates the step of dx for scaled elementary effects"""
max_y = np.max(input_matrix, axis=1)
min_y = np.min(input_matrix, axis=1)
delta = max_y - min_y
return delta
def _check_if_array_of_floats(array_x: np.ndarray):
"""Checks if an arrays is made of floats. If not, raises an error.
Parameters
----------
array_x:
Array to be checked
"""
msg = "dtype of {} array must be 'float', float32 or float64"
if array_x.dtype not in ["float", "float32", "float64"]:
raise ValueError(msg.format(array_x))
[docs]
def cli_parse(parser):
parser.add_argument(
"-X",
"--model-input-file",
type=str,
required=True,
default=None,
help="Model input file",
)
parser.add_argument(
"-r",
"--resamples",
type=int,
required=False,
default=1000,
help="Number of bootstrap resamples for Morris \
confidence intervals",
)
parser.add_argument(
"-l",
"--levels",
type=int,
required=False,
default=4,
help="Number of grid levels \
(Morris only)",
)
parser.add_argument(
"--scaled",
type=bool,
required=False,
default=False,
help="Scale the results by ratio of stddev of X and Y (Morris)",
)
return parser
[docs]
def cli_action(args):
problem = read_param_file(args.paramfile)
Y = np.loadtxt(
args.model_output_file, delimiter=args.delimiter, usecols=(args.column,)
)
X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
if len(X.shape) == 1:
X = X.reshape((len(X), 1))
analyze(
problem,
X,
Y,
num_resamples=args.resamples,
scaled=args.scaled,
print_to_console=True,
num_levels=args.levels,
seed=args.seed,
)
if __name__ == "__main__":
common_args.run_cli(cli_parse, cli_action)