from typing import Dict, List, Optional
from types import MethodType
import itertools
import time
import numpy as np
import pandas as pd
from SALib.plotting.hdmr import plot as hdmr_plot
from scipy import (stats, special, interpolate)
from . import common_args
from ..util import read_param_file, ResultDict
from matplotlib import pyplot as plt
__all__ = ['analyze', 'cli_parse', 'cli_action']
[docs]def analyze(problem: Dict, X: np.ndarray, Y: np.ndarray,
maxorder: int = 2, maxiter: int = 100,
m: int = 2, K: int = 20, R: int = None, alpha: float = 0.95,
lambdax: float = 0.01,
print_to_console: bool = True, seed: int = None) -> Dict:
"""High-Dimensional Model Representation (HDMR) using B-spline functions.
HDMR is used for variance-based global sensitivity analysis (GSA) with
correlated and uncorrelated inputs. This function uses as input
- a N x d matrix of N different d-vectors of model inputs (factors/parameters)
- a N x 1 vector of corresponding model outputs
Returns:
- each factor's first, second, and third order sensitivity coefficient
(separated in total, structural and correlative contributions),
- an estimate of their 95% confidence intervals (from bootstrap method)
- the coefficients of the significant B-spline basis functions that
govern output,
- Y (determined by an F-test of the error residuals of the HDMR model
(emulator) with/without a given first, second and/or
third order B-spline). These coefficients define an emulator that can
be used to predict the output, Y, of the original (CPU-intensive)
model for any d-vector of model inputs. For uncorrelated model inputs
(columns of X are independent), the HDMR sensitivity indices reduce
to a single index (= structural contribution), consistent with their
values derived from commonly used variance-based GSA methods.
Notes
-----
Compatible with:
all samplers
Contributed by @sahin-abdullah (sahina@uci.edu)
Parameters
----------
problem : dict
The problem definition
X : numpy.matrix
The NumPy matrix containing the model inputs, N rows by d columns
Y : numpy.array
The NumPy array containing the model outputs for each row of X
maxorder : int (1-3, default: 2)
Maximum HDMR expansion order
maxiter : int (1-1000, default: 100)
Max iterations backfitting
m : int (2-10, default: 2)
Number of B-spline intervals
K : int (1-100, default: 20)
Number of bootstrap iterations
R : int (100-N/2, default: N/2)
Number of bootstrap samples. Will be set to length of `Y` if `K` is set to 1.
alpha : float (0.5-1)
Confidence interval F-test
lambdax : float (0-10, default: 0.01)
Regularization term
print_to_console : bool
Print results directly to console (default False)
seed : bool
Set a seed value
Returns
-------
Si : ResultDict,
Sa: Uncorrelated contribution
Sa_conf: Confidence interval of Sa
Sb: Correlated contribution
Sb_conf: Confidence interval of Sb
S: Total contribution of a particular term
S_conf: Confidence interval of S
ST: Total contribution of a particular dimension/parameter
ST_conf: Confidence interval of ST
Sa: Uncorrelated contribution
select: Number of selection (F-Test)
Em: Result set
C1: First order coefficient
C2: Second order coefficient
C3: Third Order coefficient
References
----------
.. [1] Genyuan Li, H. Rabitz, P.E. Yelvington, O.O. Oluwole, F. Bacon,
C.E. Kolb, and J. Schoendorf, "Global Sensitivity Analysis for
Systems with Independent and/or Correlated Inputs", Journal of
Physical Chemistry A, Vol. 114 (19), pp. 6022 - 6032, 2010,
https://doi.org/10.1021/jp9096919
Examples
--------
>>> X = saltelli.sample(problem, 512)
>>> Y = Ishigami.evaluate(X)
>>> Si = hdmr.analyze(problem, X, Y, **options)
"""
# Random Seed
if seed:
np.random.seed(seed)
# Initial part: Check input arguments and define HDMR variables
settings = _check_settings(X, Y, maxorder, maxiter, m, K, R, alpha, lambdax)
init_vars = _init(X, Y, settings)
# Sensitivity Analysis Computation with/without bootstraping
SA, Em, RT, Y_em, idx = _compute(X, Y, settings, init_vars)
# Finalize results
Si = _finalize(problem, SA, Em, problem['num_vars'], alpha, maxorder, RT, Y_em, idx, X, Y)
# Print results to console
if print_to_console:
_print(Si, problem['num_vars'])
return Si
def _check_settings(X, Y, maxorder, maxiter, m, K, R, alpha, lambdax):
"""Perform checks to ensure all parameters are within usable/expected ranges."""
# Get dimensionality of numpy arrays
N, d = X.shape
y_row = Y.shape[0]
# Now check input-output mismatch
if d == 1:
raise RuntimeError("Matrix X contains only a single column: No point to do sensitivity analysis when d = 1.")
if N < 300:
raise RuntimeError(f"Number of samples in matrix X, {N}, is insufficient. Need at least 300.")
if N != y_row:
raise RuntimeError(f"Dimension mismatch. The number of outputs ({y_row}) should match number of samples ({N})")
if Y.size != N:
raise RuntimeError("Y should be a N x 1 vector with one simulated output for each N parameter vectors.")
if maxorder not in (1,2,3):
raise RuntimeError(f"Field \"maxorder\" of options should be an integer with values of 1, 2 or 3, got {maxorder}")
# Important next check for maxorder - as maxorder relates to d
if (d == 2) and (maxorder > 2):
raise RuntimeError("SALib-HDMR ERRROR: Field \"maxorder\" of options has to be 2 as d = 2 (X has two columns)")
if maxiter not in np.arange(1, 1001):
raise RuntimeError("Field \"maxiter\" of options should be an integer between 1 to 1000.")
if m not in np.arange(1, 11):
raise RuntimeError("Field \"m\" of options should be an integer between 1 to 10.")
if K not in np.arange(1, 101):
raise RuntimeError("Field \"K\" of options should be an integer between 1 to 100.")
if R is None:
R = y_row // 2
elif R not in np.arange(300, N+1):
raise RuntimeError(f"Field \"R\" of options should be an integer between 300 and {N}, the number of rows matrix X.")
if (K == 1) and (R != y_row):
R = y_row
if alpha < 0.5 or alpha > 1.0:
raise RuntimeError("Field \"alpha\" of options should be an integer between 0.5 to 1.0")
if lambdax > 10.0:
raise RuntimeError("SALib-HDMR WARNING: Field \"lambdax\" of options set rather large. Default: lambdax = 0.01")
elif lambdax < 0.0:
raise RuntimeError("Field \"lambdax\" (regularization term) of options cannot be smaller than zero. Default: 0.01")
return [N, d, maxorder, maxiter,
m, K, R, alpha, lambdax]
def _compute(X, Y, settings, init_vars):
"""Compute HDMR"""
N, d, maxorder, maxiter, m, K, R, alpha, lambdax = settings
Em, idx, SA, RT, Y_em, Y_id, m1, m2, m3, j1, j2, j3 = init_vars
# DYNAMIC PART: Bootstrap for confidence intervals
for k in range(K):
tic = time.time() # Start timer
# Extract the "right" Y values
Y_id[:, 0] = Y[idx[:, k]]
# Compute the variance of the model output
SA['V_Y'][k, 0] = np.var(Y_id)
# Mean of the output
Em['f0'][k] = np.sum(Y_id[:, 0]) / R
# Compute residuals
Y_res = Y_id - Em['f0'][k]
# 1st order component functions: ind/backfitting
Y_em[:, j1], Y_res, Em['C1'][:, :, k] = _first_order(Em['B1'][idx[:, k], :, :], Y_res,
Em['C1'][:, :, k], R, Em['n1'], m1,
maxiter, lambdax)
# 2nd order component functions: individual
if (maxorder > 1):
Y_em[:, j2], Y_res, Em['C2'][:, :, k] = _second_order(Em['B2'][idx[:, k], :, :], Y_res,
Em['C2'][:, :, k], R, Em['n2'],
m2, lambdax)
# 3rd order component functions: individual
if (maxorder == 3):
Y_em[:, j3], Em['C3'][:, :, k] = _third_order(Em['B3'][idx[:, k], :, :], Y_res,
Em['C3'][:, :, k], R, Em['n3'], m3, lambdax)
# Identify significant and insignificant terms
Em['select'][:, k] = f_test(Y_id, Em['f0'][k], Y_em, R, alpha, m1, m2, m3, Em['n1'], Em['n2'],
Em['n3'], Em['n'])
# Store the emulated output
Em['Y_e'][:, k] = Em['f0'][k] + np.sum(Y_em, axis=1)
# RMSE of emulator
Em['RMSE'][k] = np.sqrt(np.sum(np.square(Y_id - Em['Y_e'][:, k])) / R)
# Compute sensitivity indices
SA['S'][:, k], SA['Sa'][:, k], SA['Sb'][:, k] = ancova(Y_id, Y_em, SA['V_Y'][k],
R, Em['n'])
RT[0, k] = time.time() - tic # Compute CPU time kth emulator
return (SA, Em, RT, Y_em, idx)
def _init(X, Y, settings):
"""Initialize HDMR variables."""
N, d, maxorder, maxiter, m, K, R, alpha, lambdax = settings
# Setup Bootstrap (if K > 1)
if (K == 1):
# No Bootstrap
idx = np.arange(0, N).reshape(N, 1)
else:
# Now setup the boostrap matrix with selection matrix, idx, for samples
idx = np.argsort(np.random.rand(N, K), axis=0)[:R]
# Compute normalized X-values
X_n = (X - np.tile(X.min(0), (N, 1))) / \
np.tile((X.max(0)) - X.min(0), (N, 1))
# DICT Em: Important variables
n2 = 0
n3 = 0
c2 = np.empty
c3 = np.empty
# determine all combinations of parameters (coefficients) for each order
c1 = np.arange(0, d)
n1 = d
# now return based on maxorder used
if maxorder > 1:
c2 = np.asarray(list(itertools.combinations(np.arange(0, d), 2)))
n2 = c2.shape[0]
if maxorder == 3:
c3 = np.asarray(list(itertools.combinations(np.arange(0, d), 3)))
n3 = c3.shape[0]
# calulate total number of coefficients
n = n1 + n2 + n3
# Initialize m1, m2 and m3 - number of coefficients first, second, third
# order
m1 = m + 3
m2 = m1**2
m3 = m1**3
# STRUCTURE Em: Initialization
Em = {'nterms': np.zeros(K), 'RMSE': np.zeros(K), 'm': m, 'Y_e': np.zeros((R, K)), 'f0': np.zeros(K),
'c1': c1, 'C1': np.zeros((m1, n1, K)), 'C2': np.zeros((1, 1, K)), 'C3': np.zeros((1, 1, K)),
'n': n, 'n1': n1, 'n2': n2, 'n3': n3, 'maxorder': maxorder, 'select': np.zeros((n, K)),
'B1': np.zeros((N, m1, n1))}
if (maxorder >= 2):
Em.update({'c2': c2, 'B2': np.zeros((N, m2, n2)), 'C2': np.zeros((m2, n2, K))})
if (maxorder >= 3):
Em.update({'c3': c3, 'B3': np.zeros((N, m3, n3)), 'C3': np.zeros((m3, n3, K))})
# Compute cubic B-spline
Em['B1'] = B_spline(X_n, m, d)
# Now compute B values for second order
if (maxorder > 1):
beta = np.array(list(itertools.product(range(m1), repeat=2)))
for k, j in itertools.product(range(n2), range(m2)):
Em['B2'][:, j, k] = np.multiply(
Em['B1'][:, beta[j, 0], Em['c2'][k, 0]],
Em['B1'][:, beta[j, 1], Em['c2'][k, 1]])
# Compute B values for third order
if (maxorder == 3):
# Now compute B values for third order
beta = np.array(list(itertools.product(range(m1), repeat=3)))
EmB1 = Em['B1']
for k, j in itertools.product(range(n3), range(m3)):
Emc3_k = Em['c3'][k]
beta_j = beta[j]
Em['B3'][:, j, k] = np.multiply(np.multiply(EmB1[:, beta_j[0], Emc3_k[0]],
EmB1[:, beta_j[1], Emc3_k[1]]),
EmB1[:, beta_j[2], Emc3_k[2]])
# DICT SA: Sensitivity analysis and analysis of variance decomposition
em_k = np.zeros((Em['n'], K))
SA = {
'S': em_k,
'Sa': em_k.copy(),
'Sb': em_k.copy(),
'ST': np.zeros((d, K)),
'V_Y': np.zeros((K, 1))
}
# Return runtime
RT = np.zeros((1, K))
# Initialize emulator matrix
Y_em = np.zeros((R, Em['n']))
# Initialize index of first, second and third order terms (columns of Y_em)
j1 = range(n1)
j2 = range(n1, n1 + n2)
j3 = range(n1 + n2, n1 + n2 + n3)
# Initialize temporary Y_id for bootstrap
Y_id = np.zeros((R, 1))
return [Em, idx, SA, RT, Y_em, Y_id, m1, m2, m3, j1, j2, j3]
def B_spline(X, m, d):
"""Generate cubic B-splines using scipy basis_element method.
Knot points (`t`) are automatically determined.
References
----------
.. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.BSpline.basis_element.html
"""
# Initialize B matrix
B = np.zeros((X.shape[0], m+3, d))
# Cubic basis-spline settings
k = np.arange(-1, m+2)
# Compute B-Splines
for j, i in itertools.product(range(d), range(m+3)):
k_i = k[i]
t = np.arange(k_i-2, k_i+3) / m
temp = interpolate.BSpline.basis_element(t)(X[:,j]) * np.power(m,3)
B[:, i, j] = np.where(temp < 0, 0, temp)
return B
def _first_order(B1, Y_res, C1, R, n1, m1, maxiter, lambdax):
"""Compute first order sensitivities."""
Y_i = np.zeros((R, n1)) # Initialize 1st order contributions
T1 = np.zeros((m1, R, n1)) # Initialize T(emporary) matrix - 1st
it = 0 # Initialize iteration counter
# First order individual estimation
for j in range(n1):
# Regularized least squares inversion ( minimize || C1 ||_2 )
B11 = np.matmul(np.transpose(B1[:, :, j]), B1[:, :, j])
# if it is ill-conditioned matrix, the default value is zero
if np.all(np.linalg.svd(B11)[1]): # sigma, diagonal matrix, from svd
T1[:, :, j] = np.linalg.solve(np.add(B11, np.multiply(
lambdax, np.identity(m1))), np.transpose(B1[:, :, j]))
C1[:, j] = np.matmul(T1[:, :, j], Y_res).reshape(m1,)
Y_i[:, j] = np.matmul(B1[:, :, j], C1[:, j])
# Backfitting Method
var1b_old = np.sum(np.square(C1), axis=0)
varmax = 1
while (varmax > 1e-3) and (it < maxiter):
for j in range(n1):
Y_r = Y_res
for z in range(n1):
if j != z:
Y_r = np.subtract(Y_r, np.matmul(
B1[:, :, z], C1[:, z]).reshape(R, 1))
C1[:, j] = np.matmul(T1[:, :, j], Y_r).reshape(m1,)
var1b_new = np.sum(np.square(C1), axis=0)
varmax = np.max(np.absolute(np.subtract(var1b_new, var1b_old)))
var1b_old = var1b_new
it += 1
# Now compute first-order terms
for j in range(n1):
Y_i[:, j] = np.matmul(B1[:, :, j], C1[:, j])
# Subtract each first order term from residuals
Y_res = np.subtract(Y_res, Y_i[:, j].reshape(R, 1))
return (Y_i, Y_res, C1)
def _second_order(B2, Y_res, C2, R, n2, m2, lambdax):
"""Compute second order sensitivities."""
Y_ij = np.zeros((R, n2)) # Initialize 1st order contributions
T2 = np.zeros((m2, R, n2)) # Initialize T(emporary) matrix - 1st
# First order individual estimation
for j in range(n2):
# Regularized least squares inversion ( minimize || C1 ||_2 )
B22 = np.matmul(np.transpose(B2[:, :, j]), B2[:, :, j])
# if it is ill-conditioned matrix, the default value is zero
if np.all(np.linalg.svd(B22)[1]): # sigma, diagonal matrix, from svd
T2[:, :, j] = np.linalg.solve(np.add(B22, np.multiply(
lambdax, np.identity(m2))), np.transpose(B2[:, :, j]))
C2[:, j] = np.matmul(T2[:, :, j], Y_res).reshape(m2,)
Y_ij[:, j] = np.matmul(B2[:, :, j], C2[:, j])
# Now compute second-order terms
for j in range(n2):
Y_ij[:, j] = np.matmul(B2[:, :, j], C2[:, j])
# Subtract each first order term from residuals
Y_res = np.subtract(Y_res, Y_ij[:, j].reshape(R, 1))
return (Y_ij, Y_res, C2)
def _third_order(B3, Y_res, C3, R, n3, m3, lambdax):
"""Compute third order sensitivities."""
Y_ijk = np.zeros((R, n3)) # Initialize 1st order contributions
T3 = np.zeros((m3, R, n3)) # Initialize T(emporary) matrix - 1st
# First order individual estimation
for j in range(n3):
# Regularized least squares inversion ( minimize || C1 ||_2 )
B33 = np.matmul(np.transpose(B3[:, :, j]), B3[:, :, j])
# if it is ill-conditioned matrix, the default value is zero
if np.all(np.linalg.svd(B33)[1]): # sigma, diagonal matrix, from svd
T3[:, :, j] = np.linalg.solve(np.add(B33, np.multiply(
lambdax, np.identity(m3))), np.transpose(B3[:, :, j]))
C3[:, j] = np.matmul(T3[:, :, j], Y_res).reshape(m3,)
Y_ijk[:, j] = np.matmul(B3[:, :, j], C3[:, j])
return (Y_ijk, C3)
def f_test(Y, f0, Y_em, R, alpha, m1, m2, m3, n1, n2, n3, n):
"""Use F-test for model selection."""
# Initialize ind with zeros (all terms insignificant)
select = np.zeros((n, 1))
# Determine the significant components of the HDMR model via the F-test
Y_res0 = Y - f0
SSR0 = np.sum(np.square(Y_res0))
p0 = 0
for i in range(n):
# model with ith term included
Y_res1 = Y_res0 - Y_em[:, i].reshape(R, 1)
# Number of parameters of proposed model (order dependent)
if i <= n1:
p1 = m1 # 1st order
elif n1 < i <= (n1 + n2):
p1 = m2 # 2nd order
else:
p1 = m3 # 3rd order
# Calculate SSR of Y1
SSR1 = np.sum(np.square(Y_res1))
# Now calculate the F_stat (F_stat > 0 -> SSR1 < SSR0 )
F_stat = ((SSR0 - SSR1) / (p1 - p0)) / (SSR1 / (R - p1))
# Now calculate critical F value
F_crit = stats.f.ppf(q=alpha, dfn=p1 - p0, dfd=R - p1)
# Now determine whether to accept ith component into model
if F_stat > F_crit:
# ith term is significant and should be included in model
select[i] = 1
return select.reshape(n,)
def ancova(Y, Y_em, V_Y, R, n):
"""Analysis of Covariance."""
# Compute the sum of all Y_em terms
Y0 = np.sum(Y_em[:, :], axis=1)
# Initialize each variable
S, S_a, S_b = [np.zeros((n,)) for _ in range(3)]
# Analysis of covariance
for j in range(n):
# Covariance matrix of jth term of Y_em and actual Y
C = np.cov(np.stack((Y_em[:, j], Y.reshape(R,)), axis=0))
# Total sensitivity of jth term ( = Eq. 19 of Li et al )
S[j] = C[0, 1] / V_Y
# Covariance matrix of jth term with emulator Y without jth term
C = np.cov(np.stack((Y_em[:, j], Y0 - Y_em[:, j]), axis=0))
# Structural contribution of jth term ( = Eq. 20 of Li et al )
S_a[j] = C[0, 0] / V_Y
# Correlative contribution of jth term ( = Eq. 21 of Li et al )
S_b[j] = C[0, 1] / V_Y
return (S, S_a, S_b)
def _finalize(problem, SA, Em, d, alpha, maxorder, RT, Y_em, bootstrap_idx, X, Y):
"""Creates ResultDict to be returned."""
# Create Sensitivity Indices Result Dictionary
keys = ('Sa', 'Sa_conf', 'Sb', 'Sb_conf', 'S', 'S_conf', 'select',
'Sa_sum', 'Sa_sum_conf', 'Sb_sum', 'Sb_sum_conf', 'S_sum', 'S_sum_conf')
Si = ResultDict((k, np.zeros(Em['n'])) for k in keys)
Si['Term'] = [None] * Em['n']
Si['ST'] = [np.nan, ] * Em['n']
Si['ST_conf'] = [np.nan, ] * Em['n']
# Z score
def z(p): return (-1) * np.sqrt(2) * special.erfcinv(p * 2)
# Multiplier for confidence interval
mult = z(alpha + (1 - alpha) / 2)
# Compute the total sensitivity of each parameter/coefficient
for r in range(Em['n1']):
if maxorder == 1:
TS = SA['S'][r, :]
elif maxorder == 2:
ij = Em['n1'] + np.where(np.sum(Em['c2'] == r, axis=1) == 1)[0]
TS = np.sum(SA['S'][np.append(r, ij), :], axis=0)
elif maxorder == 3:
ij = Em['n1'] + np.where(np.sum(Em['c2'] == r, axis=1) == 1)[0]
ijk = Em['n1'] + Em['n2'] + \
np.where(np.sum(Em['c3'] == r, axis=1) == 1)[0]
TS = np.sum(SA['S'][np.append(r, np.append(ij, ijk)), :], axis=0)
Si['ST'][r] = np.mean(TS)
Si['ST_conf'][r] = mult * np.std(TS)
# Fill Expansion Terms
ct = 0
for i in range(Em['n1']):
Si['Term'][ct] = problem['names'][i]
ct += 1
for i in range(Em['n2']):
Si['Term'][ct] = '/'.join([problem['names'][Em['c2'][i, 0]],
problem['names'][Em['c2'][i, 1]]])
ct += 1
for i in range(Em['n3']):
Si['Term'][ct] = '/'.join([problem['names'][Em['c3'][i, 0]],
problem['names'][Em['c3'][i, 1]], problem['names'][Em['c3'][i, 2]]])
ct += 1
# Assign Bootstrap Results to Si Dict
Si['Sa'] = np.mean(SA['Sa'], axis=1)
Si['Sb'] = np.mean(SA['Sb'], axis=1)
Si['S'] = np.mean(SA['S'], axis=1)
Si['Sa_sum'] = np.mean(np.sum(SA['Sa'], axis=0))
Si['Sb_sum'] = np.mean(np.sum(SA['Sb'], axis=0))
Si['S_sum'] = np.mean(np.sum(SA['S'], axis=0))
# Compute Confidence Interval
Si['Sa_conf'] = mult * np.std(SA['Sa'], axis=1)
Si['Sb_conf'] = mult * np.std(SA['Sb'], axis=1)
Si['S_conf'] = mult * np.std(SA['S'], axis=1)
Si['Sa_sum_conf'] = mult * np.std(np.sum(SA['Sa']))
Si['Sb_sum_conf'] = mult * np.std(np.sum(SA['Sb']))
Si['S_sum_conf'] = mult * np.std(np.sum(SA['S']))
# F-test # of selection to print out
Si['select'] = Em['select'].flatten()
Si['names'] = Si['Term']
# Additional data for plotting.
Si['Em'] = Em
Si['RT'] = RT
Si['Y_em'] = Y_em
Si['idx'] = bootstrap_idx
Si['X'] = X
Si['Y'] = Y
Si.problem = problem
Si.to_df = MethodType(to_df, Si)
Si._plot = Si.plot
Si.plot = MethodType(plot, Si)
Si.emulate = MethodType(emulate, Si)
return Si
def emulate(self, X, Y=None):
'''Emulates model output with new input data.
Generates B-Splines with new input matrix, X, and multiplies it with
coefficient matrix, C.
Compares emulated results with observed vector, Y.
Returns
========
Si : ResultDict, Updated
'''
# Dimensionality
N, d = X.shape
# Expand C
Em = self['Em']
C1, C2, C3 = Em['C1'], Em['C2'], Em['C3']
# Take average coefficient
C1 = np.mean(C1, axis=2)
C2 = np.mean(C2, axis=2)
C3 = np.mean(C3, axis=2)
# Get maxorder and other information from C matrix
m = C1.shape[0] - 3
m1 = m+3
n1, n2, n3 = d, 0, 0
if C2.shape[0] != 1:
maxorder = 2
m2 = C2.shape[0]
c2 = np.asarray(list(itertools.combinations(np.arange(0, d), 2)))
n2 = c2.shape[0]
if C3.shape[0] != 1:
maxorder = 3
m3 = C3.shape[0]
c3 = np.asarray(list(itertools.combinations(np.arange(0, d), 3)))
n3 = c3.shape[0]
# Initialize emulated Y's
Y_em = np.zeros((N, n1+n2+n3))
# Compute normalized X-values
X_n = (X - np.tile(X.min(0), (N, 1))) / \
np.tile((X.max(0)) - X.min(0), (N, 1))
# Now compute B values
B1 = B_spline(X_n, m, d)
if (maxorder > 1):
B2 = np.zeros((N, m2, n2))
beta = np.array(list(itertools.product(range(m1), repeat=2)))
for k, j in itertools.product(range(n2), range(m2)):
B2[:, j, k] = np.multiply(B1[:, beta[j, 0], c2[k, 0]],
B1[:, beta[j, 1], c2[k, 1]])
if (maxorder == 3):
B3 = np.zeros((N, m3, n3))
beta = np.array(list(itertools.product(range(m1), repeat=3)))
for k, j in itertools.product(range(n3), range(m3)):
Emc3_k = c3[k]
beta_j = beta[j]
B3[:, j, k] = np.multiply(np.multiply(B1[:, beta_j[0], Emc3_k[0]],
B1[:, beta_j[1], Emc3_k[1]]),
B1[:, beta_j[2], Emc3_k[2]])
# Calculate emulated output: First Order
for j in range(0, n1):
Y_em[:, j] = np.matmul(B1[:, :, j], C1[:, j])
# Second Order
if maxorder > 1:
for j in range(n1, n1+n2):
Y_em[:, j] = np.matmul(B2[:, :, j-n1], C2[:, j-n1])
# Third Order
if maxorder == 3:
for j in range(n1+n2, n1+n2+n3):
Y_em[:, j] = np.matmul(B3[:, :, j-n1-n2], C3[:, j-n1-n2])
Y_mean = 0
if Y is not None:
Y_mean = np.mean(Y)
self['Y_test'] = Y
emulated = np.sum(Y_em, axis=1)+Y_mean
self['emulated'] = emulated
def to_df(self):
'''Conversion method to Pandas DataFrame. To be attached to ResultDict.
Returns
========
Pandas DataFrame
'''
names = self['names']
# Only convert these elements in dict to DF
include_list = ['Sa', 'Sb', 'S', 'ST']
include_list += [f'{name}_conf' for name in include_list]
new_spec = {k: v for k, v in self.items() if k in include_list}
return pd.DataFrame(new_spec, index=names)
def plot(self):
"""HDMR specific plotting.
Gets attached to the SALib problem spec.
"""
return hdmr_plot(self)
def _print(Si, d):
print("\n")
print(
'Term \t Sa Sb S ST #select ')
print('------------------------------------------------------------------------------------')
format1 = (
"%-11s \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f) %-3.0f")
format2 = (
"%-11s \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f) %-3.0f")
for i in range(len(Si['Sa'])):
if i < d:
print(format1 % (Si['Term'][i], Si['Sa'][i], Si['Sa_conf'][i], Si['Sb'][i], Si['Sb_conf'][i], Si['S'][i],
Si['S_conf'][i], Si['ST'][i], Si['ST_conf'][i], np.sum(Si['select'][i])))
else:
print(format2 % (Si['Term'][i], Si['Sa'][i], Si['Sa_conf'][i], Si['Sb'][i], Si['Sb_conf'][i], Si['S'][i],
Si['S_conf'][i], np.sum(Si['select'][i])))
print('------------------------------------------------------------------------------------')
format3 = (
"%-11s \t %5.2f (\261%.2f) %5.2f (\261%.2f) %5.2f (\261%.2f)")
print(format3 % ('Sum', Si['Sa_sum'], Si['Sa_sum_conf'],
Si['Sb_sum'], Si['Sb_sum_conf'], Si['S_sum'], Si['S_sum_conf']))
keys = ('Sa_sum', 'Sb_sum', 'S_sum', 'Sa_sum_conf', 'Sb_sum_conf', 'S_sum_conf')
for k in keys:
Si.pop(k, None)
return Si
[docs]def cli_parse(parser):
parser.add_argument('-X', '--model-input-file', type=str, required=True,
default=None,
help='Model input file')
parser.add_argument('-mor', '--maxorder', type=int, required=False,
default=2,
help='Maximum order of expansion 1-3')
parser.add_argument('-mit', '--maxiter', type=int, required=False,
default=100,
help='Maximum iteration number')
parser.add_argument('-m', '--m-int', type=int, required=False,
default=2,
help='B-spline interval')
parser.add_argument('-K', '--K-bootstrap', type=int, required=False,
default=20,
help='Number of bootstrap')
parser.add_argument('-R', '--R-subsample', type=int, required=False,
default=None,
help='Subsample size')
parser.add_argument('-a', '--alpha', type=float, required=False,
default=0.95,
help='Confidence interval')
parser.add_argument('-lambda', '--lambdax', type=float, required=False,
default=0.01,
help='Regularization constant')
return parser
[docs]def cli_action(args):
problem = read_param_file(args.paramfile)
Y = np.loadtxt(args.model_output_file,
delimiter=args.delimiter, usecols=(args.column,))
X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
mor = args.maxorder
mit = args.maxiter
m = args.m_int
K = args.K_bootstrap
R = args.R_subsample
alpha = args.alpha
lambdax = args.lambdax
options = options = {'maxorder': mor, 'maxiter': mit, 'm': m,
'K': K, 'R': R, 'alpha': alpha, 'lambdax': lambdax, 'print_to_console': True}
if len(X.shape) == 1:
X = X.reshape((len(X), 1))
analyze(problem, X, Y, **options)
if __name__ == "__main__":
common_args.run_cli(cli_parse, cli_action)