From af6680bc1c958051af0b6ad607b5a4da4541ae2c Mon Sep 17 00:00:00 2001
From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com>
Date: Thu, 25 Jul 2024 17:06:00 +0200
Subject: [PATCH] Rudimentary checks if PCE for SeqDesign and Sobol' indices

---
 .../post_processing/post_processing.py        | 114 +++---------------
 .../surrogate_models/polynomial_chaos.py      |  33 +++--
 .../surrogate_models/sequential_design.py     |  30 ++++-
 .../surrogate_models/surrogate_models.py      |  17 +--
 4 files changed, 72 insertions(+), 122 deletions(-)

diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py
index 76452f80b..eaffdff27 100644
--- a/src/bayesvalidrox/post_processing/post_processing.py
+++ b/src/bayesvalidrox/post_processing/post_processing.py
@@ -18,6 +18,7 @@ plt.style.use(os.path.join(os.path.split(__file__)[0],
                            '../', 'bayesvalidrox.mplstyle'))
 
 
+
 class PostProcessing:
     """
     This class provides many helper functions to post-process the trained
@@ -72,7 +73,7 @@ class PostProcessing:
         x_values_orig = self.engine.ExpDesign.x_values
 
         # Compute the moments with the PCEModel object
-        self.pce_means, self.pce_stds = self.compute_pce_moments()
+        self.means, self.stds = self.MetaModel.compute_moments()
 
         # Get the variables
         out_names = Model.Output.names
@@ -88,8 +89,8 @@ class PostProcessing:
             fig, ax = plt.subplots(nrows=1, ncols=2)
 
             # Extract mean and std
-            mean_data = self.pce_means[key]
-            std_data = self.pce_stds[key]
+            mean_data = self.means[key]
+            std_data = self.stds[key]
 
             # Extract a list of x values
             if type(x_values_orig) is dict:
@@ -147,7 +148,7 @@ class PostProcessing:
                 bbox_inches='tight'
                 )
 
-        return self.pce_means, self.pce_stds
+        return self.means, self.stds
 
     # -------------------------------------------------------------------------
     def valid_metamodel(self, n_samples=1, samples=None, model_out_dict=None,
@@ -562,12 +563,18 @@ class PostProcessing:
             Total Sobol indices.
 
         """
+        # This function currently only supports PCE/aPCE
+        if not hasattr(self.MetaModel, 'meta_model_type'):
+            raise AttributeError('Sobol indices currently only support PCE-type models!')
+        if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']:
+            raise AttributeError('Sobol indices currently only support PCE-type models!')
+            
         # Extract the necessary variables
         PCEModel = self.MetaModel
-        basis_dict = PCEModel.basis_dict
-        coeffs_dict = PCEModel.coeffs_dict
+        basis_dict = PCEModel._basis_dict
+        coeffs_dict = PCEModel._coeffs_dict
         n_params = PCEModel.n_params
-        max_order = np.max(PCEModel.pce_deg)
+        max_order = np.max(PCEModel._pce_deg)
         sobol_cell_b = {}
         total_sobol_b = {}
         cov_Z_p_q = np.zeros((n_params))
@@ -600,7 +607,7 @@ class PostProcessing:
                     Basis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}']
 
                     try:
-                        clf_poly = PCEModel.clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+1}']
+                        clf_poly = PCEModel._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+1}']
                         PCECoeffs = clf_poly.coef_
                     except:
                         PCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+1}']
@@ -655,7 +662,7 @@ class PostProcessing:
                         if pIdx < n_meas_points-1:
                             nextBasis = basis_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}']
                             if PCEModel.bootstrap_method != 'fast' or b_i == 0:
-                                clf_poly = PCEModel.clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+2}']
+                                clf_poly = PCEModel._clf_poly[f'b_{b_i+1}'][output][f'y_{pIdx+2}']
                                 nextPCECoeffs = clf_poly.coef_
                             else:
                                 nextPCECoeffs = coeffs_dict[f'b_{b_i+1}'][output][f'y_{pIdx+2}']
@@ -1046,95 +1053,6 @@ class PostProcessing:
 
         return
 
-    # -------------------------------------------------------------------------
-    def compute_pce_moments(self):
-        """
-        Computes the first two moments using the PCE-based meta-model.
-        # TODO: rebuild this to use the general MetaModel.compute_moments() function.
-
-        Returns
-        -------
-        pce_means: dict
-            The first moments (mean) of outpust.
-        pce_means: dict
-            The first moments (mean) of outpust.
-
-        """
-
-        MetaModel = self.MetaModel
-        outputs = self.ModelObj.Output.names
-        pce_means_b = {}
-        pce_stds_b = {}
-
-        # Loop over bootstrap iterations
-        for b_i in range(MetaModel.n_bootstrap_itrs):
-            # Loop over the metamodels
-            coeffs_dicts = MetaModel.coeffs_dict[f'b_{b_i+1}'].items()
-            means = {}
-            stds = {}
-            for output, coef_dict in coeffs_dicts:
-
-                pce_mean = np.zeros((len(coef_dict)))
-                pce_var = np.zeros((len(coef_dict)))
-
-                for index, values in coef_dict.items():
-                    idx = int(index.split('_')[1]) - 1
-                    coeffs = MetaModel.coeffs_dict[f'b_{b_i+1}'][output][index]
-
-                    # Mean = c_0
-                    if coeffs[0] != 0:
-                        pce_mean[idx] = coeffs[0]
-                    else:
-                        clf_poly = MetaModel.clf_poly[f'b_{b_i+1}'][output]
-                        pce_mean[idx] = clf_poly[index].intercept_
-                    # Var = sum(coeffs[1:]**2)
-                    pce_var[idx] = np.sum(np.square(coeffs[1:]))
-
-                # Save predictions for each output
-                if MetaModel.dim_red_method.lower() == 'pca':
-                    PCA = MetaModel.pca[f'b_{b_i+1}'][output]
-                    means[output] = PCA.inverse_transform(pce_mean)
-                    stds[output] = np.sqrt(np.dot(pce_var,
-                                                  PCA.components_**2))
-                else:
-                    means[output] = pce_mean
-                    stds[output] = np.sqrt(pce_var)
-
-            # Save predictions for each bootstrap iteration
-            pce_means_b[b_i] = means
-            pce_stds_b[b_i] = stds
-
-        # Change the order of nesting
-        mean_all = {}
-        for i in sorted(pce_means_b):
-            for k, v in pce_means_b[i].items():
-                if k not in mean_all:
-                    mean_all[k] = [None] * len(pce_means_b)
-                mean_all[k][i] = v
-        std_all = {}
-        for i in sorted(pce_stds_b):
-            for k, v in pce_stds_b[i].items():
-                if k not in std_all:
-                    std_all[k] = [None] * len(pce_stds_b)
-                std_all[k][i] = v
-
-        # Back transformation if PCA is selected.
-        pce_means, pce_stds = {}, {}
-        for output in outputs:
-            pce_means[output] = np.mean(mean_all[output], axis=0)
-            pce_stds[output] = np.mean(std_all[output], axis=0)
-
-            # Print a report table
-            print("\n>>>>> Moments of {} <<<<<".format(output))
-            print("\nIndex  |  Mean   |  Std. deviation")
-            print('-'*35)
-            print('\n'.join(f'{i+1}  |  {k:.3e}  |  {j:.3e}' for i, (k, j)
-                            in enumerate(zip(pce_means[output],
-                                             pce_stds[output]))))
-        print('-'*40)
-
-        return pce_means, pce_stds
-
     # -------------------------------------------------------------------------
     def _get_sample(self, n_samples=None):
         """
diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
index 103edfa8d..b30ec22d6 100644
--- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
+++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
@@ -115,11 +115,14 @@ class PCE(MetaModel):
         self._pce_deg = pce_deg
         self._pce_q_norm = pce_q_norm
 
+        # TODO: These three parameters belong to the 'error_model'. 
+        #       Can this be removed to outside of this class?
+        #self._errorScale = None 
+        #self._error_clf_poly = None
+        #self._errorRegMethod = None
+        
         # Other params
         self._polycoeffs = None
-        self._errorScale = None
-        self._error_clf_poly = None
-        self._errorRegMethod = None
         self._univ_p_val = None
         self._allBasisIndices = None
         self._deg_array = None
@@ -140,13 +143,15 @@ class PCE(MetaModel):
             DESCRIPTION.
 
         """
-        # TODO: add to these!
-        if n_bootstrap_itrs>=0:
+        # TODO: add to these! E.g. ridge and lasso
+        if n_bootstrap_itrs>1:
             return True 
-        if _pce_reg_method != 'ols':
-            return True
-        else:
+        if _pce_reg_method.lower() == 'ols':
+            return False
+        if _pce_reg_method.lower() == 'lars':
             return False
+        else:
+            return True
         
 
     def build_metamodel(self, n_init_samples=None) -> None:
@@ -370,6 +375,7 @@ class PCE(MetaModel):
                     self._basis_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['multi_indices']
                     self.LOOCV_score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore']
                     self._clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['_clf_poly']
+                    # TODO: this is commented out here, but should be used in the SeqDesign??
                     # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror']
                     
         # Restore the univariate polynomials
@@ -1066,6 +1072,17 @@ class PCE(MetaModel):
             pce_means[output] = np.mean(mean_all[output], axis=0)
             pce_stds[output] = np.mean(std_all[output], axis=0)
 
+        if self.verbose:
+            for output in outputs:
+                # Print a report table
+                print("\n>>>>> Moments of {} <<<<<".format(output))
+                print("\nIndex  |  Mean   |  Std. deviation")
+                print('-'*35)
+                print('\n'.join(f'{i+1}  |  {k:.3e}  |  {j:.3e}' for i, (k, j)
+                                in enumerate(zip(pce_means[output],
+                                                 pce_stds[output]))))
+            print('-'*40)
+
         return pce_means, pce_stds
     
 
diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py
index 38c5c3ac9..2a64f00c3 100644
--- a/src/bayesvalidrox/surrogate_models/sequential_design.py
+++ b/src/bayesvalidrox/surrogate_models/sequential_design.py
@@ -777,14 +777,26 @@ class SequentialDesign:
 
         # Bayesian information criterion
         elif var.lower() == 'bic':
-            coeffs = self.MetaModel.coeffs_dict.values()
+            # This function currently only supports PCE/aPCE
+            if not hasattr(self.MetaModel, 'meta_model_type'):
+                raise AttributeError('Sobol indices currently only support PCE-type models!')
+            if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']:
+                raise AttributeError('Sobol indices currently only support PCE-type models!')
+            
+            coeffs = self.MetaModel._coeffs_dict.values()
             nModelParams = max(len(v) for val in coeffs for v in val.values())
             maxL = np.nanmax(likelihoods)
             U_J_d = -2 * np.log(maxL) + np.log(n_obs) * nModelParams
 
         # Akaike information criterion
         elif var.lower() == 'aic':
-            coeffs = self.MetaModel.coeffs_dict.values()
+            # This function currently only supports PCE/aPCE
+            if not hasattr(self.MetaModel, 'meta_model_type'):
+                raise AttributeError('Sobol indices currently only support PCE-type models!')
+            if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']:
+                raise AttributeError('Sobol indices currently only support PCE-type models!')
+                
+            coeffs = self.MetaModel._coeffs_dict.values()
             nModelParams = max(len(v) for val in coeffs for v in val.values())
             maxlogL = np.log(np.nanmax(likelihoods))
             AIC = -2 * maxlogL + 2 * nModelParams
@@ -1109,6 +1121,14 @@ class SequentialDesign:
         X_new : array of shape (1, n_params)
             The new sampling location in the input space.
         """
+        
+        # This function currently only supports PCE/aPCE
+        if not hasattr(self.MetaModel, 'meta_model_type'):
+            raise AttributeError('Sobol indices currently only support PCE-type models!')
+        if self.MetaModel.meta_model_type.lower() not in ['pce', 'apce']:
+            raise AttributeError('Sobol indices currently only support PCE-type models!')
+            
+        
         MetaModelOrig = self  # TODO: this doesn't fully seem correct?
         n_new_samples = MetaModelOrig.ExpDesign.n_new_samples
         NCandidate = candidates.shape[0]
@@ -1133,16 +1153,16 @@ class SequentialDesign:
         Phi = np.zeros(NCandidate)
 
         # TODO: also patched here
-        BasisIndices = self.MetaModel.basis_dict['b_1'][OutputName]["y_" + str(outIdx + 1)]
+        BasisIndices = self.MetaModel._basis_dict['b_1'][OutputName]["y_" + str(outIdx + 1)]
         P = len(BasisIndices)
 
         # ------ Old Psi ------------
-        univ_p_val = self.MetaModel.univ_basis_vals(oldExpDesignX)
+        univ_p_val = self.MetaModel._univ_basis_vals(oldExpDesignX)
         Psi = create_psi(BasisIndices, univ_p_val)
 
         # ------ New candidates (Psi_c) ------------
         # Assemble Psi_c
-        univ_p_val_c = self.MetaModel.univ_basis_vals(candidates)
+        univ_p_val_c = self.MetaModel._univ_basis_vals(candidates)
         Psi_c = create_psi(BasisIndices, univ_p_val_c)
 
         for idx in range(NCandidate):
diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py
index 49733129f..d0229de2d 100644
--- a/src/bayesvalidrox/surrogate_models/surrogate_models.py
+++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py
@@ -40,6 +40,8 @@ class MetaModel:
     This class describes the necessary functions and propoerties of a 
     surrogate model in bayesvalidrox. It accepts an input object (input_obj)
     containing the specification of the distributions for uncertain parameters.
+    
+    # TODO: include obligatry lines of code in the empty functions, e.g. transformations, checks,...
 
     Attributes
     ----------
@@ -78,29 +80,21 @@ class MetaModel:
         self.bootstrap_method = bootstrap_method
         self.n_bootstrap_itrs = n_bootstrap_itrs
         self.dim_red_method = dim_red_method
-        self.verbose = verbose
-        
         self.is_gaussian = is_gaussian
+        self.verbose = verbose
 
         # Other params
         self.InputSpace = None
         self.var_pca_threshold = None
-        #self.errorScale = None
-        #self.errorclf_poly = None
-        #self.errorRegMethod = None
         self.n_pca_components = None
         self.out_names = None
         self.n_samples = None
         self.CollocationPoints = None # TODO: rename this
         self.pca = None
-        self.LCerror = None
+        self.LCerror = None # TODO: only used in loocv-exploration?! But not even assigned correctly in the PCE class???
         self.LOOCV_score_dict = None # TODO: generic function that calculates this score!?
-        #self.x_scaler = None
-        #self.gp_poly = None
         self.n_params = None
         self.ndim = None # TODO: this should be the same as n_params!?
-        #self.init_type = None # TODO: this parameter seems to not be used
-        #self.rmse = None # TODO: check if this is needed 
         
     def check_is_gaussian(self) -> bool:
         """
@@ -109,7 +103,8 @@ class MetaModel:
 
     def build_metamodel(self, n_init_samples=None) -> None:
         """
-        Builds the parts for the metamodel (polynomes,...) that are neede before fitting.
+        Builds the parts for the metamodel (polynomes,...) that are needed 
+        before fitting.
 
         Returns
         -------
-- 
GitLab