Source code for SALib.util

"""A set of utility functions"""

from collections import OrderedDict
from typing import Dict, Tuple
import warnings

import numpy as np  # type: ignore
import scipy as sp  # type: ignore
from typing import List

from .util_funcs import (  # noqa: F401, E402
    avail_approaches,
    read_param_file,
    _check_bounds,
    _check_groups,
)  # noqa: F401, E402
from .problem import ProblemSpec  # noqa: F401, E402
from .results import ResultDict  # noqa: F401, E402


__all__ = ["scale_samples", "read_param_file", "avail_approaches"]


def _scale_samples(params: np.ndarray, bounds: List):
    """Rescale samples in 0-to-1 range to arbitrary bounds

    Parameters
    ----------
    params : numpy.ndarray
        numpy array of dimensions `num_params`-by-:math:`N`,
        where :math:`N` is the number of samples

    bounds : list
        list of lists of dimensions `num_params`-by-2
    """
    # Check bounds are legal (upper bound is greater than lower bound)
    lower_bounds, upper_bounds = _check_bounds(bounds)

    if np.any(lower_bounds >= upper_bounds):
        raise ValueError(
            "Bounds are not legal (upper bound must be greater than lower bound)"
        )

    # This scales the samples in-place, by using the optional output
    # argument for the numpy ufunctions
    # The calculation is equivalent to:
    #   sample * (upper_bound - lower_bound) + lower_bound
    np.add(
        np.multiply(params, (upper_bounds - lower_bounds), out=params),
        lower_bounds,
        out=params,
    )


[docs] def scale_samples(params: np.ndarray, problem: Dict): """Scale samples based on specified distribution (defaulting to uniform). Adds an entry to the problem specification to indicate samples have been scaled to maintain backwards compatibility (`sample_scaled`). Parameters ---------- params : np.ndarray, numpy array of dimensions `num_params`-by-:math:`N`, where :math:`N` is the number of samples problem : dictionary, SALib problem specification Returns ------- np.ndarray, scaled samples """ bounds = problem["bounds"] dists = problem.get("dists") if dists is None: _scale_samples(params, bounds) else: if params.shape[1] != len(dists): msg = "Mismatch in number of parameters and distributions.\n" msg += "Num parameters: {}".format(params.shape[1]) msg += "Num distributions: {}".format(len(dists)) raise ValueError(msg) params = _nonuniform_scale_samples(params, bounds, dists) problem["sample_scaled"] = True return params
# limited_params = limit_samples(params, upper_bound, lower_bound, dists) def _unscale_samples(params, bounds): """Rescale samples from arbitrary bounds back to [0,1] range Parameters ---------- bounds : list list of lists of dimensions num_params-by-2 params : numpy.ndarray numpy array of dimensions num_params-by-N, where N is the number of samples """ # Check bounds are legal (upper bound is greater than lower bound) b = np.array(bounds) lower_bounds = b[:, 0] upper_bounds = b[:, 1] if np.any(lower_bounds >= upper_bounds): raise ValueError( "Bounds are not legal (upper bound must be greater than lower bound)" ) # This scales the samples in-place, by using the optional output # argument for the numpy ufunctions # The calculation is equivalent to: # (sample - lower_bound) / (upper_bound - lower_bound) np.divide( np.subtract(params, lower_bounds, out=params), np.subtract(upper_bounds, lower_bounds), out=params, ) def _nonuniform_scale_samples(params, bounds, dists): """Rescale samples in 0-to-1 range to other distributions Parameters ---------- params : numpy.ndarray numpy array of dimensions num_params-by-N, where N is the number of samples dists : list list of distributions, one for each parameter unif: uniform with lower and upper bounds logunif: logarithmic uniform with lower and upper bounds triang: triangular with lower and upper bounds, as well as location of peak The location of peak is in percentage of width e.g. :code:`[1.0, 3.0, 0.5]` indicates 1.0 to 3.0 with a peak at 2.0 A soon-to-be deprecated two-value format assumes the lower bound to be 0 e.g. :code:`[3, 0.5]` assumes 0 to 3, with a peak at 1.5 norm: normal distribution with mean and standard deviation truncnorm: truncated normal distribution with upper and lower bounds, mean and standard deviation lognorm: lognormal with ln-space mean and standard deviation """ b = np.array(bounds, dtype=object) # initializing matrix for converted values conv_params = np.empty_like(params) # loop over the parameters for i in range(conv_params.shape[1]): # setting first and second arguments for distributions b1 = b[i][0] # ending b2 = b[i][1] # 0-1 if dists[i] == "triang": if len(b[i]) == 3: loc_start = b[i][0] # loc start b1 = b[i][1] # triangular distribution end b2 = b[i][2] # 0-1 aka c (the peak) elif len(b[i]) == 2: msg = ( "Two-value format for triangular distributions detected.\n" "To remove this message, specify the distribution start, " "end, and peak (three values) " "instead of the current two-value format " "(distribution end and peak, with start assumed to be 0)\n" "The two-value format will be deprecated in SALib v1.5" ) warnings.warn(msg, DeprecationWarning, stacklevel=2) loc_start = 0 b1 = b[i][0] b2 = b[i][1] else: raise ValueError( "Unknown triangular distribution specification. Check" " problem specification." ) # checking for correct parameters if b1 < 0 or b2 < 0 or b2 >= 1 or loc_start > b1: raise ValueError( """Triangular distribution bound error: Scale must be greater than zero; peak on interval [0,1], triangular start value must be smaller than end value""" ) else: conv_params[:, i] = sp.stats.triang.ppf( params[:, i], c=b2, scale=b1 - loc_start, loc=loc_start ) elif dists[i] == "unif": if b1 >= b2: raise ValueError( """Uniform distribution: lower bound must be less than upper bound""" ) else: conv_params[:, i] = params[:, i] * (b2 - b1) + b1 elif dists[i] == "logunif": conv_params[:, i] = sp.stats.loguniform.ppf(params[:, i], a=b1, b=b2) elif dists[i] == "norm": if b2 <= 0: raise ValueError("""Normal distribution: stdev must be > 0""") else: conv_params[:, i] = sp.stats.norm.ppf(params[:, i], loc=b1, scale=b2) # Truncated normal distribution # parameters are lower bound and upper bound, mean and stdev elif dists[i] == "truncnorm": b3 = b[i][2] b4 = b[i][3] if b4 <= 0: raise ValueError( """Truncated normal distribution: stdev must be > 0""" ) if b1 >= b2: raise ValueError( """Truncated normal distribution: lower bound must be less than upper bound""" ) else: conv_params[:, i] = sp.stats.truncnorm.ppf( params[:, i], (b1 - b3) / b4, (b2 - b3) / b4, loc=b3, scale=b4 ) # lognormal distribution (ln-space, not base-10) # parameters are ln-space mean and standard deviation elif dists[i] == "lognorm": # checking for valid parameters if b2 <= 0: raise ValueError("""Lognormal distribution: stdev must be > 0""") else: conv_params[:, i] = np.exp( sp.stats.norm.ppf(params[:, i], loc=b1, scale=b2) ) else: valid_dists = ["unif", "triang", "norm", "truncnorm", "lognorm"] raise ValueError("Distributions: choose one of %s" % ", ".join(valid_dists)) return conv_params def extract_group_names(groups: List) -> Tuple: """Get a unique set of the group names. Reverts to parameter names (and number of parameters) if groups not defined. Parameters ---------- groups : List Returns ------- tuple : names, number of groups """ names = list(OrderedDict.fromkeys(groups)) number = len(names) return names, number def compute_groups_matrix(groups: List): """Generate matrix which notes factor membership of groups Computes a k-by-g matrix which notes factor membership of groups where: k is the number of variables (factors) g is the number of groups Also returns a g-length list of unique group_names whose positions correspond to the order of groups in the k-by-g matrix Parameters ---------- groups : List Group names corresponding to each variable Returns ------- tuple containing group matrix assigning parameters to groups and a list of unique group names """ num_vars = len(groups) unique_group_names, number_of_groups = extract_group_names(groups) indices = dict([(x, i) for (i, x) in enumerate(unique_group_names)]) output = np.zeros((num_vars, number_of_groups), dtype=int) for parameter_row, group_membership in enumerate(groups): group_index = indices[group_membership] output[parameter_row, group_index] = 1 return output, unique_group_names def _define_problem_with_groups(problem: Dict) -> Dict: """ Checks if the user defined the 'groups' key in the problem dictionary. If not, makes the 'groups' key equal to the variables names. In other words, the number of groups will be equal to the number of variables, which is equivalent to no groups. Parameters ---------- problem : dict The problem definition Returns ------- problem : dict The problem definition with the 'groups' key, even if the user doesn't define it """ # Checks if there isn't a key 'groups' or if it exists and is set to 'None' if "groups" not in problem or not problem["groups"]: problem["groups"] = problem["names"] elif len(problem["groups"]) != problem["num_vars"]: raise ValueError( "Number of entries in 'groups' should be the same " "as in 'names'" ) return problem def _compute_delta(num_levels: int) -> float: """Computes the delta value from number of levels Parameters --------- num_levels : int The number of levels Returns ------- float """ return num_levels / (2.0 * (num_levels - 1))