From af6680bc1c958051af0b6ad607b5a4da4541ae2c Mon Sep 17 00:00:00 2001 From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com> Date: Thu, 25 Jul 2024 17:06:00 +0200 Subject: [PATCH] Rudimentary checks if PCE for SeqDesign and Sobol' indices --- .../post_processing/post_processing.py | 114 +++--------------- .../surrogate_models/polynomial_chaos.py | 33 +++-- .../surrogate_models/sequential_design.py | 30 ++++- .../surrogate_models/surrogate_models.py | 17 +-- 4 files changed, 72 insertions(+), 122 deletions(-) diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 76452f80b..eaffdff27 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -18,6 +18,7 @@ plt.style.use(os.path.join(os.path.split(__file__)[0], '../', 'bayesvalidrox.mplstyle')) + class PostProcessing: """ This class provides many helper functions to post-process the trained @@ -72,7 +73,7 @@ class PostProcessing: x_values_orig = self.engine.ExpDesign.x_values # Compute the moments with the PCEModel object - self.pce_means, self.pce_stds = self.compute_pce_moments() + self.means, self.stds = self.MetaModel.compute_moments() # Get the variables out_names = Model.Output.names @@ -88,8 +89,8 @@ class PostProcessing: fig, ax = plt.subplots(nrows=1, ncols=2) # Extract mean and std - mean_data = self.pce_means[key] - std_data = self.pce_stds[key] + mean_data = self.means[key] + std_data = self.stds[key] # Extract a list of x values if type(x_values_orig) is dict: @@ -147,7 +148,7 @@ class PostProcessing: bbox_inches='tight' ) - return self.pce_means, self.pce_stds + return self.means, self.stds # ------------------------------------------------------------------------- def valid_metamodel(self, n_samples=1, samples=None, model_out_dict=None, @@ -562,12 +563,18 @@ class PostProcessing: Total Sobol indices. """ + # This function currently only supports PCE/aPCE + if not hasattr(self.MetaModel, 'meta_model_type'): + raise AttributeError('Sobol indices currently only support PCE-type models!') + if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']: + raise AttributeError('Sobol indices currently only support PCE-type models!') + # Extract the necessary variables PCEModel = self.MetaModel - basis_dict = PCEModel.basis_dict - coeffs_dict = PCEModel.coeffs_dict + basis_dict = PCEModel._basis_dict + coeffs_dict = PCEModel._coeffs_dict n_params = PCEModel.n_params - max_order = np.max(PCEModel.pce_deg) + max_order = np.max(PCEModel._pce_deg) sobol_cell_b = {} total_sobol_b = {} cov_Z_p_q = np.zeros((n_params)) @@ -600,7 +607,7 @@ class PostProcessing: Basis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] try: - clf_poly = PCEModel.clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] + clf_poly = PCEModel._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] PCECoeffs = clf_poly.coef_ except: PCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}'] @@ -655,7 +662,7 @@ class PostProcessing: if pIdx < n_meas_points-1: nextBasis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] if PCEModel.bootstrap_method != 'fast' or b_i == 0: - clf_poly = PCEModel.clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] + clf_poly = PCEModel._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] nextPCECoeffs = clf_poly.coef_ else: nextPCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}'] @@ -1046,95 +1053,6 @@ class PostProcessing: return - # ------------------------------------------------------------------------- - def compute_pce_moments(self): - """ - Computes the first two moments using the PCE-based meta-model. - # TODO: rebuild this to use the general MetaModel.compute_moments() function. - - Returns - ------- - pce_means: dict - The first moments (mean) of outpust. - pce_means: dict - The first moments (mean) of outpust. - - """ - - MetaModel = self.MetaModel - outputs = self.ModelObj.Output.names - pce_means_b = {} - pce_stds_b = {} - - # Loop over bootstrap iterations - for b_i in range(MetaModel.n_bootstrap_itrs): - # Loop over the metamodels - coeffs_dicts = MetaModel.coeffs_dict[f'b_{b_i+1}'].items() - means = {} - stds = {} - for output, coef_dict in coeffs_dicts: - - pce_mean = np.zeros((len(coef_dict))) - pce_var = np.zeros((len(coef_dict))) - - for index, values in coef_dict.items(): - idx = int(index.split('_')[1]) - 1 - coeffs = MetaModel.coeffs_dict[f'b_{b_i+1}'][output][index] - - # Mean = c_0 - if coeffs[0] != 0: - pce_mean[idx] = coeffs[0] - else: - clf_poly = MetaModel.clf_poly[f'b_{b_i+1}'][output] - pce_mean[idx] = clf_poly[index].intercept_ - # Var = sum(coeffs[1:]**2) - pce_var[idx] = np.sum(np.square(coeffs[1:])) - - # Save predictions for each output - if MetaModel.dim_red_method.lower() == 'pca': - PCA = MetaModel.pca[f'b_{b_i+1}'][output] - means[output] = PCA.inverse_transform(pce_mean) - stds[output] = np.sqrt(np.dot(pce_var, - PCA.components_**2)) - else: - means[output] = pce_mean - stds[output] = np.sqrt(pce_var) - - # Save predictions for each bootstrap iteration - pce_means_b[b_i] = means - pce_stds_b[b_i] = stds - - # Change the order of nesting - mean_all = {} - for i in sorted(pce_means_b): - for k, v in pce_means_b[i].items(): - if k not in mean_all: - mean_all[k] = [None] * len(pce_means_b) - mean_all[k][i] = v - std_all = {} - for i in sorted(pce_stds_b): - for k, v in pce_stds_b[i].items(): - if k not in std_all: - std_all[k] = [None] * len(pce_stds_b) - std_all[k][i] = v - - # Back transformation if PCA is selected. - pce_means, pce_stds = {}, {} - for output in outputs: - pce_means[output] = np.mean(mean_all[output], axis=0) - pce_stds[output] = np.mean(std_all[output], axis=0) - - # Print a report table - print("\n>>>>> Moments of {} <<<<<".format(output)) - print("\nIndex | Mean | Std. deviation") - print('-'*35) - print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) - in enumerate(zip(pce_means[output], - pce_stds[output])))) - print('-'*40) - - return pce_means, pce_stds - # ------------------------------------------------------------------------- def _get_sample(self, n_samples=None): """ diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 103edfa8d..b30ec22d6 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -115,11 +115,14 @@ class PCE(MetaModel): self._pce_deg = pce_deg self._pce_q_norm = pce_q_norm + # TODO: These three parameters belong to the 'error_model'. + # Can this be removed to outside of this class? + #self._errorScale = None + #self._error_clf_poly = None + #self._errorRegMethod = None + # Other params self._polycoeffs = None - self._errorScale = None - self._error_clf_poly = None - self._errorRegMethod = None self._univ_p_val = None self._allBasisIndices = None self._deg_array = None @@ -140,13 +143,15 @@ class PCE(MetaModel): DESCRIPTION. """ - # TODO: add to these! - if n_bootstrap_itrs>=0: + # TODO: add to these! E.g. ridge and lasso + if n_bootstrap_itrs>1: return True - if _pce_reg_method != 'ols': - return True - else: + if _pce_reg_method.lower() == 'ols': + return False + if _pce_reg_method.lower() == 'lars': return False + else: + return True def build_metamodel(self, n_init_samples=None) -> None: @@ -370,6 +375,7 @@ class PCE(MetaModel): self._basis_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['multi_indices'] self.LOOCV_score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore'] self._clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['_clf_poly'] + # TODO: this is commented out here, but should be used in the SeqDesign?? # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror'] # Restore the univariate polynomials @@ -1066,6 +1072,17 @@ class PCE(MetaModel): pce_means[output] = np.mean(mean_all[output], axis=0) pce_stds[output] = np.mean(std_all[output], axis=0) + if self.verbose: + for output in outputs: + # Print a report table + print("\n>>>>> Moments of {} <<<<<".format(output)) + print("\nIndex | Mean | Std. deviation") + print('-'*35) + print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) + in enumerate(zip(pce_means[output], + pce_stds[output])))) + print('-'*40) + return pce_means, pce_stds diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py index 38c5c3ac9..2a64f00c3 100644 --- a/src/bayesvalidrox/surrogate_models/sequential_design.py +++ b/src/bayesvalidrox/surrogate_models/sequential_design.py @@ -777,14 +777,26 @@ class SequentialDesign: # Bayesian information criterion elif var.lower() == 'bic': - coeffs = self.MetaModel.coeffs_dict.values() + # This function currently only supports PCE/aPCE + if not hasattr(self.MetaModel, 'meta_model_type'): + raise AttributeError('Sobol indices currently only support PCE-type models!') + if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']: + raise AttributeError('Sobol indices currently only support PCE-type models!') + + coeffs = self.MetaModel._coeffs_dict.values() nModelParams = max(len(v) for val in coeffs for v in val.values()) maxL = np.nanmax(likelihoods) U_J_d = -2 * np.log(maxL) + np.log(n_obs) * nModelParams # Akaike information criterion elif var.lower() == 'aic': - coeffs = self.MetaModel.coeffs_dict.values() + # This function currently only supports PCE/aPCE + if not hasattr(self.MetaModel, 'meta_model_type'): + raise AttributeError('Sobol indices currently only support PCE-type models!') + if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']: + raise AttributeError('Sobol indices currently only support PCE-type models!') + + coeffs = self.MetaModel._coeffs_dict.values() nModelParams = max(len(v) for val in coeffs for v in val.values()) maxlogL = np.log(np.nanmax(likelihoods)) AIC = -2 * maxlogL + 2 * nModelParams @@ -1109,6 +1121,14 @@ class SequentialDesign: X_new : array of shape (1, n_params) The new sampling location in the input space. """ + + # This function currently only supports PCE/aPCE + if not hasattr(self.MetaModel, 'meta_model_type'): + raise AttributeError('Sobol indices currently only support PCE-type models!') + if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']: + raise AttributeError('Sobol indices currently only support PCE-type models!') + + MetaModelOrig = self # TODO: this doesn't fully seem correct? n_new_samples = MetaModelOrig.ExpDesign.n_new_samples NCandidate = candidates.shape[0] @@ -1133,16 +1153,16 @@ class SequentialDesign: Phi = np.zeros(NCandidate) # TODO: also patched here - BasisIndices = self.MetaModel.basis_dict['b_1'][OutputName]["y_" + str(outIdx + 1)] + BasisIndices = self.MetaModel._basis_dict['b_1'][OutputName]["y_" + str(outIdx + 1)] P = len(BasisIndices) # ------ Old Psi ------------ - univ_p_val = self.MetaModel.univ_basis_vals(oldExpDesignX) + univ_p_val = self.MetaModel._univ_basis_vals(oldExpDesignX) Psi = create_psi(BasisIndices, univ_p_val) # ------ New candidates (Psi_c) ------------ # Assemble Psi_c - univ_p_val_c = self.MetaModel.univ_basis_vals(candidates) + univ_p_val_c = self.MetaModel._univ_basis_vals(candidates) Psi_c = create_psi(BasisIndices, univ_p_val_c) for idx in range(NCandidate): diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index 49733129f..d0229de2d 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -40,6 +40,8 @@ class MetaModel: This class describes the necessary functions and propoerties of a surrogate model in bayesvalidrox. It accepts an input object (input_obj) containing the specification of the distributions for uncertain parameters. + + # TODO: include obligatry lines of code in the empty functions, e.g. transformations, checks,... Attributes ---------- @@ -78,29 +80,21 @@ class MetaModel: self.bootstrap_method = bootstrap_method self.n_bootstrap_itrs = n_bootstrap_itrs self.dim_red_method = dim_red_method - self.verbose = verbose - self.is_gaussian = is_gaussian + self.verbose = verbose # Other params self.InputSpace = None self.var_pca_threshold = None - #self.errorScale = None - #self.errorclf_poly = None - #self.errorRegMethod = None self.n_pca_components = None self.out_names = None self.n_samples = None self.CollocationPoints = None # TODO: rename this self.pca = None - self.LCerror = None + self.LCerror = None # TODO: only used in loocv-exploration?! But not even assigned correctly in the PCE class??? self.LOOCV_score_dict = None # TODO: generic function that calculates this score!? - #self.x_scaler = None - #self.gp_poly = None self.n_params = None self.ndim = None # TODO: this should be the same as n_params!? - #self.init_type = None # TODO: this parameter seems to not be used - #self.rmse = None # TODO: check if this is needed def check_is_gaussian(self) -> bool: """ @@ -109,7 +103,8 @@ class MetaModel: def build_metamodel(self, n_init_samples=None) -> None: """ - Builds the parts for the metamodel (polynomes,...) that are neede before fitting. + Builds the parts for the metamodel (polynomes,...) that are needed + before fitting. Returns ------- -- GitLab