From f8531d7ee43ca74e9cc8690f276874bc66027a66 Mon Sep 17 00:00:00 2001
From: Maria Fernanda Morales <65073126+mfmo45@users.noreply.github.com>
Date: Thu, 8 Aug 2024 16:45:59 +0200
Subject: [PATCH] [PCA] Fixed PCA bootstrap loops

---
 src/bayesvalidrox/surrogate_models/engine.py  |  4 +++-
 .../surrogate_models/polynomial_chaos.py      | 18 ++++++++--------
 .../surrogate_models/surrogate_models.py      | 21 +++++++++----------
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py
index d5110f94e..a77dff09b 100644
--- a/src/bayesvalidrox/surrogate_models/engine.py
+++ b/src/bayesvalidrox/surrogate_models/engine.py
@@ -127,7 +127,9 @@ class Engine:
 
         # Prepare X samples
         # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on
-        # TODO: set to some more general value, chech how much this is needed!
+        # TODO: set to some more general value, check how much this is needed!
+        # TODO: for GPs, the maxdeg should be set to 1, because self.emulator will also be True, or give the
+        #  Parent class a _pce_deg=1 attribute automatically
         if self.emulator:
             maxdeg = np.max(MetaModel._pce_deg)
         else:
diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
index 7f8c708ec..e83eb9ff1 100644
--- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
+++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
@@ -295,6 +295,15 @@ class PCE(MetaModel):
                               range(output.shape[1]))
                 out = list(results)
 
+            # Store the first out dictionary
+            if self.fast_bootstrap and b_i == 0:
+                self.first_out[key] = copy.deepcopy(out)
+
+            # Update the coefficients with OLS during bootstrap-iters
+            if b_i > 0 and self.fast_bootstrap:
+                out = self.update_pce_coeffs(
+                    X, output, self.first_out[key])
+
             # Create a dict to pass the variables
             for i in range(output.shape[1]):
                 self._deg_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['degree']
@@ -306,15 +315,6 @@ class PCE(MetaModel):
                 # TODO: this is commented out here, but should be used in the SeqDesign??
                 # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror']
 
-            # Store the first out dictionary
-            if self.fast_bootstrap and b_i == 0:
-                self.first_out[key] = copy.deepcopy(out)
-
-            # Update the coefficients with OLS during bootstrap-iters
-            if b_i > 0 and self.fast_bootstrap:
-                out = self.update_pce_coeffs(
-                    X, output, self.first_out[key])
-
     # -------------------------------------------------------------------------
 
     def update_pce_coeffs(self, X, y, out_dict=None):
diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py
index 94b3dd50f..d6c968944 100644
--- a/src/bayesvalidrox/surrogate_models/surrogate_models.py
+++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py
@@ -231,7 +231,7 @@ def transform_y(self, y, b_i=0, trafo_type=''):
 
             # Start transformation
             pca, y_transform[key], n_comp = self.pca_transformation(
-                y, self.n_pca_components)
+                y[key], self.n_pca_components)
             self.pca[f'b_{b_i + 1}'][key] = pca
 
             # Store the number of components for fast bootstrapping
@@ -303,16 +303,12 @@ def _bootstrap_eval(eval_function):
             kwargs['b_i'] = b_i
             mean_pred, std_pred = eval_function(self, *args, **kwargs)
 
-            # Apply inverse transformations
-            for i in range(mean_pred[list(mean_pred.keys())[0]].shape[1]):
-                # Save predictions for each output
-                if self.dim_red_method.lower() == 'pca':
-                    pca = self.pca[f'b_{b_i + 1}'][f"y_{i + 1}"]
-                    mean_pred[f"y_{i + 1}"] = pca.inverse_transform(mean_pred)
-                    std_pred[f"y_{i + 1}"] = np.zeros(mean_pred.shape)
-                else:
-                    mean_pred[f"y_{i + 1}"] = mean_pred
-                    std_pred[f"y_{i + 1}"] = std_pred
+            # Appy inverse transformation
+            if self.dim_red_method.lower() == 'pca':
+                for output, values in mean_pred.items():
+                    pca = self.pca[f'b_{b_i + 1}'][output]
+                    mean_pred[output] = pca.inverse_transform(values)
+                    std_pred[output] = np.zeros(values.shape)
 
             # Save predictions for each bootstrap iteration
             mean_pred_b[b_i] = mean_pred
@@ -487,6 +483,9 @@ class MetaModel:
         The number of features is set by `self.n_pca_components`.
         If this is not given, `self.var_pca_threshold` is used as a threshold.
 
+        ToDo: Check the inputs needed for this class, there is an error when PCA is used.
+        ToDo: From the y_transformation() function, a dictionary is being sent instead of an array for target.
+
         Parameters
         ----------
         target : array of shape (n_samples,)
-- 
GitLab