diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 592652e2abcd2fcabb09392e800c02b5fe27f655..37d34f31de0bc58e2ee56b6b60d7b95895da9d63 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -10,11 +10,11 @@ import warnings import functools import math +from itertools import combinations import matplotlib.pyplot as plt import numpy as np import sklearn.linear_model as lm from joblib import Parallel, delayed -from itertools import combinations from tqdm import tqdm from .apoly_construction import apoly_construction @@ -25,7 +25,11 @@ from .orthogonal_matching_pursuit import OrthogonalMatchingPursuit from .reg_fast_ard import RegressionFastARD from .reg_fast_laplace import RegressionFastLaplace -from .surrogate_models import MetaModel, _preprocessing_fit, _bootstrap_fit, _preprocessing_eval, _bootstrap_eval +from .surrogate_models import (MetaModel, + _preprocessing_fit, + _bootstrap_fit, + _preprocessing_eval, + _bootstrap_eval) from .supplementary import corr_loocv_error, create_psi @@ -662,7 +666,7 @@ class PCE(MetaModel): print("Best Indices:\n", basis_indices) if self._pce_reg_method in ['BRR', 'ARD']: - fig, ax = plt.subplots(figsize=(12, 10)) + _, ax = plt.subplots(figsize=(12, 10)) plt.title("Marginal log-likelihood") plt.plot(_clf_poly.scores_, color='navy', linewidth=2) plt.ylabel("Score") @@ -832,13 +836,13 @@ class PCE(MetaModel): # Create orthogonal polynomial coefficients if necessary if max_deg is not None: # and self.input_obj.poly_coeffs_flag: self._polycoeffs = {} - for parIdx in tqdm(range(ndim), ascii=True, + for par_idx in tqdm(range(ndim), ascii=True, desc="Computing orth. polynomial coeffs"): poly_coeffs = apoly_construction( - self.InputSpace.raw_data[parIdx], + self.InputSpace.raw_data[par_idx], max_deg ) - self._polycoeffs[f'p_{parIdx + 1}'] = poly_coeffs + self._polycoeffs[f'p_{par_idx + 1}'] = poly_coeffs else: raise AttributeError( 'MetaModel cannot generate polynomials in the given scenario!') @@ -860,14 +864,10 @@ class PCE(MetaModel): pce_means_b = {} pce_stds_b = {} - # Loop over bootstrap iterations for b_i in range(self.n_bootstrap_itrs): - # Loop over the metamodels _coeffs_dicts = self._coeffs_dict[f'b_{b_i + 1}'].items() - means = {} - stds = {} - for output, coef_dict in _coeffs_dicts: + for output, coef_dict in _coeffs_dicts: pce_mean = np.zeros((len(coef_dict))) pce_var = np.zeros((len(coef_dict))) @@ -875,27 +875,21 @@ class PCE(MetaModel): idx = int(index.split('_')[1]) - 1 coeffs = self._coeffs_dict[f'b_{b_i + 1}'][output][index] - # Mean = c_0 if coeffs[0] != 0: pce_mean[idx] = coeffs[0] else: _clf_poly = self._clf_poly[f'b_{b_i + 1}'][output] pce_mean[idx] = _clf_poly[index].intercept_ - # Var = sum(coeffs[1:]**2) pce_var[idx] = np.sum(np.square(coeffs[1:])) # Save predictions for each output if self.dim_red_method.lower() == 'pca': pca = self.pca[f'b_{b_i + 1}'][output] - means[output] = pca.inverse_transform(pce_mean) - stds[output] = pca.inverse_transform(np.sqrt(pce_var)) + pce_means_b[b_i][output] = pca.inverse_transform(pce_mean) + pce_stds_b[b_i][output] = pca.inverse_transform(np.sqrt(pce_var)) else: - means[output] = pce_mean - stds[output] = np.sqrt(pce_var) - - # Save predictions for each bootstrap iteration - pce_means_b[b_i] = means - pce_stds_b[b_i] = stds + pce_means_b[b_i][output] = pce_mean + pce_stds_b[b_i][output] = np.sqrt(pce_var) # Change the order of nesting mean_all = {} @@ -917,10 +911,10 @@ class PCE(MetaModel): pce_means[output] = np.mean(mean_all[output], axis=0) pce_stds[output] = np.mean(std_all[output], axis=0) + # Print a report table if self.verbose: for output in outputs: - # Print a report table - print("\n>>>>> Moments of {} <<<<<".format(output)) + print(f"\n>>>>> Moments of {output} <<<<<") print("\nIndex | Mean | Std. deviation") print('-'*35) print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) @@ -945,13 +939,11 @@ class PCE(MetaModel): totalsobol """ - # Extract the necessary variables - max_order = np.max(self._pce_deg) # TODO : simplify - sobol_cell_b = {} - total_sobol_b = {} + max_order = np.max(self._pce_deg) n_params = len(self.input_obj.Marginals) - cov_Z_p_q = np.zeros((n_params)) + cov_zpq = np.zeros((n_params)) outputs = self.out_names + sobol_cell_b, total_sobol_b = {}, {} for b_i in range(self.n_bootstrap_itrs): sobol_cell_, total_sobol_ = {}, {} @@ -965,130 +957,117 @@ class PCE(MetaModel): for i_order in range(1, max_order + 1): n_comb = math.comb(n_params, i_order) - sobol_cell_array[i_order] = np.zeros((n_comb, n_meas_points)) - total_sobol_array = np.zeros((n_params, n_meas_points)) # Initialize the cell to store the names of the variables - TotalVariance = np.zeros((n_meas_points)) - # Loop over all measurement points and calculate sobol indices - for pIdx in range(n_meas_points): - # Extract the basis indices (alpha) and coefficients - Basis = self._basis_dict[f"b_{b_i+1}"][output][f"y_{pIdx+1}"] - - try: - clf_poly = self.clf_poly[f"b_{b_i+1}"][output][ - f"y_{pIdx+1}" - ] - PCECoeffs = clf_poly.coef_ - except: - PCECoeffs = self._coeffs_dict[f"b_{b_i+1}"][output][f"y_{pIdx+1}"] + total_variance = np.zeros((n_meas_points)) + for p_idx in range(n_meas_points): + basis = self._basis_dict[f"b_{b_i+1}"][output][f"y_{p_idx+1}"] + coeffs = self._coeffs_dict[f"b_{b_i+1}"][output][f"y_{p_idx+1}"] # Compute total variance - TotalVariance[pIdx] = np.sum(np.square(PCECoeffs[1:])) + total_variance[p_idx] = np.sum(np.square(coeffs[1:])) - nzidx = np.where(PCECoeffs != 0)[0] + nzidx = np.where(coeffs != 0)[0] # Set all the Sobol indices equal to zero in the presence of a # null output. if len(nzidx) == 0: - # This is buggy. + # TODO: This is buggy. for i_order in range(1, max_order + 1): - sobol_cell_array[i_order][:, pIdx] = 0 + sobol_cell_array[i_order][:, p_idx] = 0 - # Otherwise compute them by summing well-chosen coefficients + # Otherwise compute them from sums of the coefficients else: - nz_basis = Basis[nzidx] + nz_basis = basis[nzidx] for i_order in range(1, max_order + 1): idx = np.where(np.sum(nz_basis > 0, axis=1) == i_order) subbasis = nz_basis[idx] - Z = np.array(list(combinations(range(n_params), i_order))) - - for q in range(Z.shape[0]): - Zq = Z[q] - subsubbasis = subbasis[:, Zq] - subidx = np.prod(subsubbasis, axis=1) > 0 - sum_ind = nzidx[idx[0][subidx]] - if TotalVariance[pIdx] == 0.0: - sobol_cell_array[i_order][q, pIdx] = 0.0 + z_ = np.array(list(combinations(range(n_params), i_order))) + + for q in range(z_.shape[0]): + if total_variance[p_idx] == 0.0: + sobol_cell_array[i_order][q, p_idx] = 0.0 else: - sobol = np.sum(np.square(PCECoeffs[sum_ind])) - sobol /= TotalVariance[pIdx] - sobol_cell_array[i_order][q, pIdx] = sobol + subidx = np.prod(subbasis[:, z_[q]], axis=1) > 0 + sum_ind = nzidx[idx[0][subidx]] + sobol = np.sum(np.square(coeffs[sum_ind])) + sobol /= total_variance[p_idx] + sobol_cell_array[i_order][q, p_idx] = sobol # Compute the TOTAL Sobol indices. - for ParIdx in range(n_params): - idx = nz_basis[:, ParIdx] > 0 + for par_idx in range(n_params): + idx = nz_basis[:, par_idx] > 0 sum_ind = nzidx[idx] - if TotalVariance[pIdx] == 0.0: - total_sobol_array[ParIdx, pIdx] = 0.0 + if total_variance[p_idx] == 0.0: + total_sobol_array[par_idx, p_idx] = 0.0 else: - sobol = np.sum(np.square(PCECoeffs[sum_ind])) - sobol /= TotalVariance[pIdx] - total_sobol_array[ParIdx, pIdx] = sobol + sobol = np.sum(np.square(coeffs[sum_ind])) + sobol /= total_variance[p_idx] + total_sobol_array[par_idx, p_idx] = sobol # ----- if PCA selected: Compute covariance ----- + # TODO: does this not double itself with the next if-statement? if self.dim_red_method.lower() == "pca": # Extract the basis indices (alpha) and coefficients for # next component - if pIdx < n_meas_points - 1: - nextBasis = self._basis_dict[f"b_{b_i+1}"][output][f"y_{pIdx+2}"] + if p_idx < n_meas_points - 1: + nextbasis = self._basis_dict[f"b_{b_i+1}"][output][f"y_{p_idx+2}"] if self.bootstrap_method != "fast" or b_i == 0: - clf_poly = self.clf_poly[f"b_{b_i+1}"][output][ - f"y_{pIdx+2}" + clf_poly = self._clf_poly[f"b_{b_i+1}"][output][ + f"y_{p_idx+2}" ] - nextPCECoeffs = clf_poly.coef_ + next_coeffs = clf_poly.coef_ else: - nextPCECoeffs = self._coeffs_dict[f"b_{b_i+1}"][output][ - f"y_{pIdx+2}" + next_coeffs = self._coeffs_dict[f"b_{b_i+1}"][output][ + f"y_{p_idx+2}" ] # Choose the common non-zero basis - mask = (Basis[:, None] == nextBasis).all(-1).any(-1) - n_mask = (nextBasis[:, None] == Basis).all(-1).any(-1) + mask = (basis[:, None] == nextbasis).all(-1).any(-1) + n_mask = (nextbasis[:, None] == basis).all(-1).any(-1) # Compute the covariance in Eq 17. - for ParIdx in range(n_params): - idx = (mask) & (Basis[:, ParIdx] > 0) - n_idx = (n_mask) & (nextBasis[:, ParIdx] > 0) + for par_idx in range(n_params): + idx = (mask) & (basis[:, par_idx] > 0) + n_idx = (n_mask) & (nextbasis[:, par_idx] > 0) try: - cov_Z_p_q[ParIdx] += np.sum( - np.dot(PCECoeffs[idx], nextPCECoeffs[n_idx]) + cov_zpq[par_idx] += np.sum( + np.dot(coeffs[idx], next_coeffs[n_idx]) ) except: pass # Compute the sobol indices according to Ref. 2 if self.dim_red_method.lower() == "pca": + # TODO: Update so that it does not use the ExpDesign anymore! n_c_points = self.engine.ExpDesign.Y[output].shape[1] - PCA = self.pca[f"b_{b_i+1}"][output] - compPCA = PCA.components_ - nComp = compPCA.shape[0] - var_Z_p = PCA.explained_variance_ + pca = self.pca[f"b_{b_i+1}"][output] + comp_pca = pca.components_ + n_comp = comp_pca.shape[0] + var_zp = pca.explained_variance_ # Extract the sobol index of the components for i_order in range(1, max_order + 1): n_comb = math.comb(n_params, i_order) sobol_array[i_order] = np.zeros((n_comb, n_c_points)) - Z = np.array(list(combinations(range(n_params), i_order))) + z_ = np.array(list(combinations(range(n_params), i_order))) # Loop over parameters - for q in range(Z.shape[0]): - S_Z_i = sobol_cell_array[i_order][q] + for q in range(z_.shape[0]): + s_zi = sobol_cell_array[i_order][q] - for tIdx in range(n_c_points): - var_Y_t = np.var( - self.engine.ExpDesign.Y[output][:, tIdx] + for t_idx in range(n_c_points): + var_yt = np.var( + self.engine.ExpDesign.Y[output][:, t_idx] ) - if var_Y_t == 0.0: - term1, term2 = 0.0, 0.0 - else: + term1, term2 = 0.0, 0.0 + if var_yt != 0.0: # Eq. 17 - term1 = 0.0 - for i in range(nComp): - a = S_Z_i[i] * var_Z_p[i] - a *= compPCA[i, tIdx] ** 2 + for i in range(n_comp): + a = s_zi[i] * var_zp[i] + a *= comp_pca[i, t_idx] ** 2 term1 += a # TODO: Term 2 @@ -1098,41 +1077,35 @@ class PCE(MetaModel): # term2 *= compPCA[i+1, tIdx] # term2 *= 2 - sobol_array[i_order][q, tIdx] = term1 # + term2 - - # Devide over total output variance Eq. 18 - sobol_array[i_order][q, tIdx] /= var_Y_t + # Divide over total output variance Eq. 18 + sobol_array[i_order][q, t_idx] = term1 # + term2 + sobol_array[i_order][q, t_idx] /= var_yt # Compute the TOTAL Sobol indices. total_sobol = np.zeros((n_params, n_c_points)) - for ParIdx in range(n_params): - S_Z_i = total_sobol_array[ParIdx] - - for tIdx in range(n_c_points): - var_Y_t = np.var(self.engine.ExpDesign.Y[output][:, tIdx]) - if var_Y_t == 0.0: - term1, term2 = 0.0, 0.0 - else: - term1 = 0 - for i in range(nComp): + for par_idx in range(n_params): + s_zi = total_sobol_array[par_idx] + + for t_idx in range(n_c_points): + var_yt = np.var(self.engine.ExpDesign.Y[output][:, t_idx]) + term1, term2 = 0.0, 0.0 + if var_yt != 0.0: + for i in range(n_comp): term1 += ( - S_Z_i[i] * var_Z_p[i] * (compPCA[i, tIdx] ** 2) + s_zi[i] * var_zp[i] * (comp_pca[i, t_idx] ** 2) ) - - # Term 2 - term2 = 0 - for i in range(nComp - 1): + for i in range(n_comp - 1): term2 += ( - cov_Z_p_q[ParIdx] - * compPCA[i, tIdx] - * compPCA[i + 1, tIdx] + cov_zpq[par_idx] + * comp_pca[i, t_idx] + * comp_pca[i + 1, t_idx] ) term2 *= 2 - total_sobol[ParIdx, tIdx] = term1 # + term2 + total_sobol[par_idx, t_idx] = term1 # + term2 # Devide over total output variance Eq. 18 - total_sobol[ParIdx, tIdx] /= var_Y_t + total_sobol[par_idx, t_idx] /= var_yt sobol_cell_[output] = sobol_array total_sobol_[output] = total_sobol @@ -1140,37 +1113,36 @@ class PCE(MetaModel): sobol_cell_[output] = sobol_cell_array total_sobol_[output] = total_sobol_array - # Save for each bootsrtap iteration + # Save for each bootstrap iteration sobol_cell_b[b_i] = sobol_cell_ total_sobol_b[b_i] = total_sobol_ - # Average total sobol indices - total_sobol_all = {} - for i in sorted(total_sobol_b): - for k, v in total_sobol_b[i].items(): - if k not in total_sobol_all: - total_sobol_all[k] = [None] * len(total_sobol_b) - total_sobol_all[k][i] = v + # Combine for Sobol' indices, one set of indices for each possible degree of interaction + self.sobol = {} sobol_all = {} for i in sorted(sobol_cell_b): for k, v in sobol_cell_b[i].items(): - for l, m in v.items(): + for l, _ in v.items(): if l not in sobol_all: sobol_all[l] = {} if k not in sobol_all[l]: sobol_all[l][k] = [None] * len(sobol_cell_b) sobol_all[l][k][i] = v[l] - - self.sobol = {} - # Will receive a set of indices for each possible degree of polynomial/interaction for i_order in range(1, max_order + 1): self.sobol[i_order] = {} for output in outputs: self.sobol[i_order][output] = np.mean( [sobol_all[i_order][output]], axis=0 ) - + + # Combine for Total Sobol' indices + total_sobol_all = {} self.total_sobol = {} + for i in sorted(total_sobol_b): + for k, v in total_sobol_b[i].items(): + if k not in total_sobol_all: + total_sobol_all[k] = [None] * len(total_sobol_b) + total_sobol_all[k][i] = v for output in outputs: self.total_sobol[output] = np.mean(total_sobol_all[output], axis=0)