From f8531d7ee43ca74e9cc8690f276874bc66027a66 Mon Sep 17 00:00:00 2001 From: Maria Fernanda Morales <65073126+mfmo45@users.noreply.github.com> Date: Thu, 8 Aug 2024 16:45:59 +0200 Subject: [PATCH] [PCA] Fixed PCA bootstrap loops --- src/bayesvalidrox/surrogate_models/engine.py | 4 +++- .../surrogate_models/polynomial_chaos.py | 18 ++++++++-------- .../surrogate_models/surrogate_models.py | 21 +++++++++---------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py index d5110f94e..a77dff09b 100644 --- a/src/bayesvalidrox/surrogate_models/engine.py +++ b/src/bayesvalidrox/surrogate_models/engine.py @@ -127,7 +127,9 @@ class Engine: # Prepare X samples # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on - # TODO: set to some more general value, chech how much this is needed! + # TODO: set to some more general value, check how much this is needed! + # TODO: for GPs, the maxdeg should be set to 1, because self.emulator will also be True, or give the + # Parent class a _pce_deg=1 attribute automatically if self.emulator: maxdeg = np.max(MetaModel._pce_deg) else: diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 7f8c708ec..e83eb9ff1 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -295,6 +295,15 @@ class PCE(MetaModel): range(output.shape[1])) out = list(results) + # Store the first out dictionary + if self.fast_bootstrap and b_i == 0: + self.first_out[key] = copy.deepcopy(out) + + # Update the coefficients with OLS during bootstrap-iters + if b_i > 0 and self.fast_bootstrap: + out = self.update_pce_coeffs( + X, output, self.first_out[key]) + # Create a dict to pass the variables for i in range(output.shape[1]): self._deg_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['degree'] @@ -306,15 +315,6 @@ class PCE(MetaModel): # TODO: this is commented out here, but should be used in the SeqDesign?? # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror'] - # Store the first out dictionary - if self.fast_bootstrap and b_i == 0: - self.first_out[key] = copy.deepcopy(out) - - # Update the coefficients with OLS during bootstrap-iters - if b_i > 0 and self.fast_bootstrap: - out = self.update_pce_coeffs( - X, output, self.first_out[key]) - # ------------------------------------------------------------------------- def update_pce_coeffs(self, X, y, out_dict=None): diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index 94b3dd50f..d6c968944 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -231,7 +231,7 @@ def transform_y(self, y, b_i=0, trafo_type=''): # Start transformation pca, y_transform[key], n_comp = self.pca_transformation( - y, self.n_pca_components) + y[key], self.n_pca_components) self.pca[f'b_{b_i + 1}'][key] = pca # Store the number of components for fast bootstrapping @@ -303,16 +303,12 @@ def _bootstrap_eval(eval_function): kwargs['b_i'] = b_i mean_pred, std_pred = eval_function(self, *args, **kwargs) - # Apply inverse transformations - for i in range(mean_pred[list(mean_pred.keys())[0]].shape[1]): - # Save predictions for each output - if self.dim_red_method.lower() == 'pca': - pca = self.pca[f'b_{b_i + 1}'][f"y_{i + 1}"] - mean_pred[f"y_{i + 1}"] = pca.inverse_transform(mean_pred) - std_pred[f"y_{i + 1}"] = np.zeros(mean_pred.shape) - else: - mean_pred[f"y_{i + 1}"] = mean_pred - std_pred[f"y_{i + 1}"] = std_pred + # Appy inverse transformation + if self.dim_red_method.lower() == 'pca': + for output, values in mean_pred.items(): + pca = self.pca[f'b_{b_i + 1}'][output] + mean_pred[output] = pca.inverse_transform(values) + std_pred[output] = np.zeros(values.shape) # Save predictions for each bootstrap iteration mean_pred_b[b_i] = mean_pred @@ -487,6 +483,9 @@ class MetaModel: The number of features is set by `self.n_pca_components`. If this is not given, `self.var_pca_threshold` is used as a threshold. + ToDo: Check the inputs needed for this class, there is an error when PCA is used. + ToDo: From the y_transformation() function, a dictionary is being sent instead of an array for target. + Parameters ---------- target : array of shape (n_samples,) -- GitLab