From 4151a329c2f7e6ad8194f7199d24d8a87ac86fce Mon Sep 17 00:00:00 2001
From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com>
Date: Thu, 25 Jul 2024 10:55:34 +0200
Subject: [PATCH] Basic training with PCE stable, generalized
 MetaModel.calculate_moments

Tested static training with PCE on analytical example, remaining changes tbd for sequential training and postprocessing
---
 .../example_analytical_function.py            |  17 +-
 ...mple_analytical_function_testSequential.py |  15 +-
 .../post_processing/post_processing.py        |   1 +
 src/bayesvalidrox/surrogate_models/engine.py  |   9 +-
 .../surrogate_models/polynomial_chaos.py      | 411 +++++++-----------
 .../surrogate_models/surrogate_models.py      |  36 +-
 6 files changed, 204 insertions(+), 285 deletions(-)

diff --git a/examples/analytical-function/example_analytical_function.py b/examples/analytical-function/example_analytical_function.py
index 09261acba..70124f9cf 100644
--- a/examples/analytical-function/example_analytical_function.py
+++ b/examples/analytical-function/example_analytical_function.py
@@ -25,22 +25,9 @@ matplotlib.use('agg')
 
 # import bayesvalidrox
 # Add BayesValidRox path
-sys.path.append("src/")
 sys.path.append("../../src/")
 
-import bayesvalidrox
-from bayesvalidrox import PyLinkForwardModel
-
-from bayesvalidrox.pylink.pylink import PyLinkForwardModel
-from bayesvalidrox.surrogate_models.inputs import Input
-from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns
-from bayesvalidrox.surrogate_models.surrogate_models import MetaModel
-#from bayesvalidrox.surrogate_models.meta_model_engine import MetaModelEngine
-from bayesvalidrox.post_processing.post_processing import PostProcessing
-from bayesvalidrox.bayes_inference.bayes_inference import BayesInference
-from bayesvalidrox.bayes_inference.discrepancy import Discrepancy
-
-from bayesvalidrox.surrogate_models.engine import Engine
+from bayesvalidrox import PyLinkForwardModel, Input, ExpDesigns, PCE, PostProcessing, BayesInference, Discrepancy, Engine
 
 if __name__ == "__main__":
 
@@ -95,7 +82,7 @@ if __name__ == "__main__":
     # =====================================================
     # ==========  DEFINITION OF THE METAMODEL  ============
     # =====================================================
-    MetaModelOpts = MetaModel(Inputs)#, Model)
+    MetaModelOpts = PCE(Inputs)#, Model)
 
     # Select if you want to preserve the spatial/temporal depencencies
     # MetaModelOpts.dim_red_method = 'PCA'
diff --git a/examples/analytical-function_sequentialdesign/example_analytical_function_testSequential.py b/examples/analytical-function_sequentialdesign/example_analytical_function_testSequential.py
index 7739966ea..b32961f45 100644
--- a/examples/analytical-function_sequentialdesign/example_analytical_function_testSequential.py
+++ b/examples/analytical-function_sequentialdesign/example_analytical_function_testSequential.py
@@ -27,18 +27,7 @@ import matplotlib
 # Add BayesValidRox path
 sys.path.append("../../src/")
 
-from bayesvalidrox.surrogate_models.inputs import Input
-from bayesvalidrox.surrogate_models.input_space import InputSpace
-from bayesvalidrox.pylink.pylink import PyLinkForwardModel
-from bayesvalidrox.surrogate_models.inputs import Input
-from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns
-from bayesvalidrox.surrogate_models.surrogate_models import MetaModel
-#from bayesvalidrox.surrogate_models.meta_model_engine import MetaModelEngine
-from bayesvalidrox.post_processing.post_processing import PostProcessing
-from bayesvalidrox.bayes_inference.bayes_inference import BayesInference
-from bayesvalidrox.bayes_inference.discrepancy import Discrepancy
-
-from bayesvalidrox.surrogate_models.engine import Engine
+from bayesvalidrox import Input, PyLinkForwardModel, ExpDesigns, PCE, PostProcessing, BayesInference, Discrepancy, Engine
 
 if __name__ == "__main__":
 
@@ -93,7 +82,7 @@ if __name__ == "__main__":
     # =====================================================
     # ==========  DEFINITION OF THE METAMODEL  ============
     # =====================================================
-    MetaModelOpts = MetaModel(Inputs)#, Model)
+    MetaModelOpts = PCE(Inputs)#, Model)
 
     # Select if you want to preserve the spatial/temporal depencencies
     # MetaModelOpts.dim_red_method = 'PCA'
diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py
index 020695c76..76452f80b 100644
--- a/src/bayesvalidrox/post_processing/post_processing.py
+++ b/src/bayesvalidrox/post_processing/post_processing.py
@@ -1050,6 +1050,7 @@ class PostProcessing:
     def compute_pce_moments(self):
         """
         Computes the first two moments using the PCE-based meta-model.
+        # TODO: rebuild this to use the general MetaModel.compute_moments() function.
 
         Returns
         -------
diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py
index 0c746b5a7..8d27310b7 100644
--- a/src/bayesvalidrox/surrogate_models/engine.py
+++ b/src/bayesvalidrox/surrogate_models/engine.py
@@ -1011,20 +1011,21 @@ class Engine:
             RMSE of the standard deviations
 
         """
-        if self.Model.mc_reference == {}:
+        if self.Model.mc_reference is {}:
             raise AttributeError('Model.mc_reference needs to be given to calculate the surrogate error!')
+        
         # Compute the mean and std based on the MetaModel
-        pce_means, pce_stds = self.MetaModel._compute_pce_moments()
+        means, stds = self.MetaModel.compute_moments()
 
         # Compute the root mean squared error
         for output in self.out_names:
             # Compute the error between mean and std of MetaModel and OrigModel
             # TODO: write test that checks if mc_reference exists
             RMSE_Mean = mean_squared_error(
-                self.Model.mc_reference['mean'], pce_means[output], squared=False
+                self.Model.mc_reference['mean'], means[output], squared=False
             )
             RMSE_std = mean_squared_error(
-                self.Model.mc_reference['std'], pce_stds[output], squared=False
+                self.Model.mc_reference['std'], stds[output], squared=False
             )
 
         return RMSE_Mean, RMSE_std
diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
index a57e77282..fc483d57d 100644
--- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
+++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py
@@ -53,11 +53,11 @@ class PCE(MetaModel):
     ----------
     input_obj : obj
         Input object with the information on the model input parameters.
-    meta_model_type : str
+    _meta_model_type : str
         Surrogate model types. Three surrogate model types are supported:
         polynomial chaos expansion (`PCE`), arbitrary PCE (`aPCE`). 
         Default is PCE.
-    pce_reg_method : str
+    _pce_reg_method : str
         PCE regression method to compute the coefficients. The following
         regression methods are available:
 
@@ -75,11 +75,11 @@ class PCE(MetaModel):
         the coefficent are recalculated with the ordinary least square method.
     n_bootstrap_itrs : int
         Number of iterations for the bootstrap sampling. The default is `1`.
-    pce_deg : int or list of int
+    _pce_deg : int or list of int
         Polynomial degree(s). If a list is given, an adaptive algorithm is used
         to find the best degree with the lowest Leave-One-Out cross-validation
         (LOO) error (or the highest score=1-LOO). Default is `1`.
-    pce_q_norm : float
+    _pce_q_norm : float
         Hyperbolic (or q-norm) truncation for multi-indices of multivariate
         polynomials. Default is `1.0`.
     dim_red_method : str
@@ -108,40 +108,37 @@ class PCE(MetaModel):
                      n_bootstrap_itrs, dim_red_method, is_gaussian, 
                      verbose)
 
-        self.meta_model_type = meta_model_type
-        self.pce_reg_method = pce_reg_method
-        self.pce_deg = pce_deg
-        self.pce_q_norm = pce_q_norm
+        # Additional inputs
+        # Parameters that are not needed from the outside are denoted with '_'
+        self._meta_model_type = meta_model_type
+        self._pce_reg_method = pce_reg_method
+        self._pce_deg = pce_deg
+        self._pce_q_norm = pce_q_norm
 
         # Other params
-        self.polycoeffs = None
-        self.errorScale = None
-        self.errorclf_poly = None
-        self.errorRegMethod = None
-        self.nlc = None
-        self.univ_p_val = None
-        self.n_pca_components = None
-        self.out_names = None
-        self.allBasisIndices = None
-        self.deg_array = None
-        self.n_samples = None
-        self.CollocationPoints = None
-        self.pca = None
-        self.LCerror = None
-        self.clf_poly = None
-        self.score_dict = None
-        self.basis_dict = None
-        self.coeffs_dict = None
-        self.q_norm_dict = None
-        self.deg_dict = None
-        self.x_scaler = None
-        self.gp_poly = None
-        self.n_params = None
-        self.ndim = None
-        self.init_type = None
-        self.rmse = None
+        self._polycoeffs = None
+        self._errorScale = None
+        self._error_clf_poly = None
+        self._errorRegMethod = None
+        self._univ_p_val = None
+        #self.out_names = None
+        self._allBasisIndices = None
+        self._deg_array = None
+        #self.n_samples = None
+        #self.CollocationPoints = None # TODO: rename this!
+        #self.pca = None
+        self._LCerror = None
+        self._clf_poly = None
+        self._score_dict = None
+        self._basis_dict = None
+        self._coeffs_dict = None
+        self._q_norm_dict = None
+        self._deg_dict = None
+        #self.n_params = None
+        #self.ndim = None
+        #self.rmse = None
         
-    def check_is_gaussian(self, pce_reg_method, n_bootstrap_itrs) -> bool:
+    def check_is_gaussian(self, _pce_reg_method, n_bootstrap_itrs) -> bool:
         """
         Check if the metamodel returns a mean and stdev.
 
@@ -154,7 +151,7 @@ class PCE(MetaModel):
         # TODO: add to these!
         if n_bootstrap_itrs>=0:
             return True 
-        if pce_reg_method != 'ols':
+        if _pce_reg_method != 'ols':
             return True
         else:
             return False
@@ -172,7 +169,7 @@ class PCE(MetaModel):
         """
 
         # Generate general warnings
-        if self.pce_reg_method.lower() == 'ols':
+        if self._pce_reg_method.lower() == 'ols':
             print('There are no estimations of surrogate uncertainty available'
                   ' for the chosen regression options. This might lead to issues'
                   ' in later steps.')
@@ -184,10 +181,10 @@ class PCE(MetaModel):
         # Add InputSpace to MetaModel if it does not have any
         if self.InputSpace is None:
             if n_init_samples is None:
-                n_init_samples = self.CollocationPoints.shape[0]
+                n_init_samples = self._CollocationPoints.shape[0]
             self.InputSpace = InputSpace(self.input_obj)
             self.InputSpace.n_init_samples = n_init_samples
-            self.InputSpace.init_param_space(np.max(self.pce_deg))
+            self.InputSpace.init_param_space(np.max(self._pce_deg))
 
         self.ndim = self.InputSpace.ndim
 
@@ -198,16 +195,16 @@ class PCE(MetaModel):
         self.n_params = len(self.input_obj.Marginals)
 
         # Generate polynomials
-        self.generate_polynomials(np.max(self.pce_deg))
+        self._generate_polynomials(np.max(self._pce_deg))
 
         # Initialize the nested dictionaries
-        self.deg_dict = self.auto_vivification()
-        self.q_norm_dict = self.auto_vivification()
-        self.coeffs_dict = self.auto_vivification()
-        self.basis_dict = self.auto_vivification()
-        self.score_dict = self.auto_vivification()
-        self.clf_poly = self.auto_vivification()
-        self.LCerror = self.auto_vivification()
+        self._deg_dict = self.auto_vivification()
+        self._q_norm_dict = self.auto_vivification()
+        self._coeffs_dict = self.auto_vivification()
+        self._basis_dict = self.auto_vivification()
+        self._score_dict = self.auto_vivification()
+        self._clf_poly = self.auto_vivification()
+        self._LCerror = self.auto_vivification()
         if self.dim_red_method.lower() == 'pca':
             self.pca = self.auto_vivification()
 
@@ -219,20 +216,20 @@ class PCE(MetaModel):
                 'The given samples do not match the given number of priors. The samples should be a 2D array of size'
                 '(#samples, #priors)')
 
-        self.deg_array = self.__select_degree(ndim, self.n_samples)
+        self._deg_array = self.__select_degree(ndim, self.n_samples)
 
         # Generate all basis indices
-        self.allBasisIndices = self.auto_vivification()
-        for deg in self.deg_array:
-            keys = self.allBasisIndices.keys()
+        self._allBasisIndices = self.auto_vivification()
+        for deg in self._deg_array:
+            keys = self._allBasisIndices.keys()
             if deg not in np.fromiter(keys, dtype=float):
                 # Generate the polynomial basis indices
-                for qidx, q in enumerate(self.pce_q_norm):
+                for qidx, q in enumerate(self._pce_q_norm):
                     basis_indices = glexindex(start=0, stop=deg + 1,
                                               dimensions=self.n_params,
                                               cross_truncation=q,
                                               reverse=False, graded=True)
-                    self.allBasisIndices[str(deg)][str(q)] = basis_indices
+                    self._allBasisIndices[str(deg)][str(q)] = basis_indices
 
     def fit(self, X: np.array, y: dict, parallel=False, verbose=False):
         """
@@ -277,17 +274,17 @@ class PCE(MetaModel):
         self.CollocationPoints = X
 
         # TODO: other option: rebuild every time
-        if self.deg_array is None:
+        if self._deg_array is None:
             self.build_metamodel(n_init_samples=X.shape[1])
 
         # Evaluate the univariate polynomials on InputSpace
-        self.univ_p_val = self.univ_basis_vals(self.CollocationPoints)
+        self._univ_p_val = self.univ_basis_vals(self.CollocationPoints)
         # Store the original ones for later use
-        orig_univ_p_val  = copy.deepcopy(self.univ_p_val)
+        orig__univ_p_val  = copy.deepcopy(self._univ_p_val)
 
         # --- Loop through data points and fit the surrogate ---
         if verbose:
-            print(f"\n>>>> Training the {self.meta_model_type} metamodel "
+            print(f"\n>>>> Training the {self._meta_model_type} metamodel "
                   "started. <<<<<<\n")
 
         # --- Bootstrap sampling ---
@@ -349,7 +346,7 @@ class PCE(MetaModel):
                 # Parallel fit regression
                 out = None
                 # Bootstrap the univariate polynomials for use during training
-                self.univ_p_val = orig_univ_p_val[b_indices]
+                self._univ_p_val = orig__univ_p_val[b_indices]
                 if parallel and (not fast_bootstrap or b_i == 0):
                     out = Parallel(n_jobs=-1, backend='multiprocessing')(
                         delayed(self.adaptive_regression)(  # X_train_b,
@@ -375,19 +372,19 @@ class PCE(MetaModel):
 
                 # Create a dict to pass the variables
                 for i in range(target.shape[1]):
-                    self.deg_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['degree']
-                    self.q_norm_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['qnorm']
-                    self.coeffs_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['coeffs']
-                    self.basis_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['multi_indices']
-                    self.score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore']
-                    self.clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['clf_poly']
-                    # self.LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['LCerror']
+                    self._deg_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['degree']
+                    self._q_norm_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['qnorm']
+                    self._coeffs_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['coeffs']
+                    self._basis_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['multi_indices']
+                    self._score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore']
+                    self._clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['_clf_poly']
+                    # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror']
                     
         # Restore the univariate polynomials
-        self.univ_p_val = orig_univ_p_val
+        self._univ_p_val = orig__univ_p_val
 
         if verbose:
-            print(f"\n>>>> Training the {self.meta_model_type} metamodel"
+            print(f"\n>>>> Training the {self._meta_model_type} metamodel"
                   " sucessfully completed. <<<<<<\n")
 
     # -------------------------------------------------------------------------
@@ -412,15 +409,15 @@ class PCE(MetaModel):
             The updated training output dictionary.
 
         """
-        # TODO: why is X not used here? -> Uses self.univ_p_val instead
-        #       This should be changed to either check if the univ_p_val are accurate
+        # TODO: why is X not used here? -> Uses self._univ_p_val instead
+        #       This should be changed to either check if the _univ_p_val are accurate
         #       or to always construct them from the X-values
         # Make a copy
         final_out_dict = copy.deepcopy(out_dict)
         
-        # Create the univ_p_val
-        univ_p_val = self.univ_p_val
-#        univ_p_val = self.univ_basis_vals(X, n_max=np.max(self.pce_deg))
+        # Create the _univ_p_val
+        _univ_p_val = self._univ_p_val
+#        _univ_p_val = self.univ_basis_vals(X, n_max=np.max(self._pce_deg))
 
         # Loop over the points
         for i in range(y.shape[1]):
@@ -431,7 +428,7 @@ class PCE(MetaModel):
                 basis_indices = out_dict[i]['multi_indices']
 
                 # Evaluate the multivariate polynomials on CollocationPoints
-                psi = create_psi(basis_indices, univ_p_val)#self.univ_p_val)
+                psi = create_psi(basis_indices, _univ_p_val)#self._univ_p_val)
 
                 # Calulate the cofficients of surrogate model
                 updated_out = self.regression(
@@ -465,16 +462,16 @@ class PCE(MetaModel):
         poly_types = self.InputSpace.poly_types
         if samples.ndim != 2:
             samples = samples.reshape(1, len(samples))
-        n_max = np.max(self.pce_deg) if n_max is None else n_max
+        n_max = np.max(self._pce_deg) if n_max is None else n_max
 
         # Extract poly coeffs
         if self.InputSpace.input_data_given or self.InputSpace.apce:
-            apolycoeffs = self.polycoeffs
+            a_polycoeffs = self._polycoeffs
         else:
-            apolycoeffs = None
+            a_polycoeffs = None
 
         # Evaluate univariate basis
-        univ_basis = eval_univ_basis(samples, n_max, poly_types, apolycoeffs)
+        univ_basis = eval_univ_basis(samples, n_max, poly_types, a_polycoeffs)
 
         return univ_basis
 
@@ -504,7 +501,7 @@ class PCE(MetaModel):
 
         """
         if reg_method is None:
-            reg_method = self.pce_reg_method
+            reg_method = self._pce_reg_method
 
         bias_term = self.dim_red_method.lower() != 'pca'
 
@@ -517,22 +514,22 @@ class PCE(MetaModel):
             Lambda = 1e-6
 
         # Bayes sparse adaptive aPCE
-        clf_poly = None
+        _clf_poly = None
         if reg_method.lower() == 'ols':
-            clf_poly = lm.LinearRegression(fit_intercept=False)
+            _clf_poly = lm.LinearRegression(fit_intercept=False)
         elif reg_method.lower() == 'brr':
-            clf_poly = lm.BayesianRidge(n_iter=1000, tol=1e-7,
+            _clf_poly = lm.BayesianRidge(n_iter=1000, tol=1e-7,
                                         fit_intercept=False,
                                         # normalize=True,
                                         compute_score=compute_score,
                                         alpha_1=1e-04, alpha_2=1e-04,
                                         lambda_1=Lambda, lambda_2=Lambda)
-            clf_poly.converged = True
+            _clf_poly.converged = True
 
         elif reg_method.lower() == 'ard':
             if X.shape[0] < 2:
                 raise ValueError('Regression with ARD can only be performed for more than 2 samples')
-            clf_poly = lm.ARDRegression(fit_intercept=False,
+            _clf_poly = lm.ARDRegression(fit_intercept=False,
                                         # normalize=True,
                                         compute_score=compute_score,
                                         n_iter=1000, tol=0.0001,
@@ -540,7 +537,7 @@ class PCE(MetaModel):
                                         lambda_1=Lambda, lambda_2=Lambda)
 
         elif reg_method.lower() == 'fastard':
-            clf_poly = RegressionFastARD(fit_intercept=False,
+            _clf_poly = RegressionFastARD(fit_intercept=False,
                                          normalize=True,
                                          compute_score=compute_score,
                                          n_iter=300, tol=1e-10)
@@ -548,49 +545,51 @@ class PCE(MetaModel):
         elif reg_method.lower() == 'bcs':
             if X.shape[0] < 10:
                 raise ValueError('Regression with BCS can only be performed for more than 10 samples')
-            clf_poly = RegressionFastLaplace(fit_intercept=False,
+            _clf_poly = RegressionFastLaplace(fit_intercept=False,
                                              bias_term=bias_term,
                                              n_iter=1000, tol=1e-7)
 
         elif reg_method.lower() == 'lars':
             if X.shape[0] < 10:
                 raise ValueError('Regression with LARS can only be performed for more than 5 samples')
-            clf_poly = lm.LassoLarsCV(fit_intercept=False)
+            _clf_poly = lm.LassoLarsCV(fit_intercept=False)
 
         elif reg_method.lower() == 'sgdr':
-            clf_poly = lm.SGDRegressor(fit_intercept=False,
+            _clf_poly = lm.SGDRegressor(fit_intercept=False,
                                        max_iter=5000, tol=1e-7)
 
         elif reg_method.lower() == 'omp':
-            clf_poly = OrthogonalMatchingPursuit(fit_intercept=False)
+            _clf_poly = OrthogonalMatchingPursuit(fit_intercept=False)
 
         elif reg_method.lower() == 'vbl':
-            clf_poly = VBLinearRegression(fit_intercept=False)
+            _clf_poly = VBLinearRegression(fit_intercept=False)
 
         elif reg_method.lower() == 'ebl':
-            clf_poly = EBLinearRegression(optimizer='em')
+            _clf_poly = EBLinearRegression(optimizer='em')
 
-        clf_poly.fit(X, y)
+        print(X.shape)
+        print(y.shape)
+        _clf_poly.fit(X, y)
 
         # Select the nonzero entries of coefficients
         if sparsity:
-            nnz_idx = np.nonzero(clf_poly.coef_)[0]
+            nnz_idx = np.nonzero(_clf_poly.coef_)[0]
         else:
-            nnz_idx = np.arange(clf_poly.coef_.shape[0])
+            nnz_idx = np.arange(_clf_poly.coef_.shape[0])
 
         # This is for the case where all outputs are zero, thereby
         # all coefficients are zero
         if (y == 0).all():
-            nnz_idx = np.insert(np.nonzero(clf_poly.coef_)[0], 0, 0)
+            nnz_idx = np.insert(np.nonzero(_clf_poly.coef_)[0], 0, 0)
 
         sparse_basis_indices = basis_indices[nnz_idx]
         sparse_X = X[:, nnz_idx]
-        coeffs = clf_poly.coef_[nnz_idx]
-        clf_poly.coef_ = coeffs
+        coeffs = _clf_poly.coef_[nnz_idx]
+        _clf_poly.coef_ = coeffs
 
         # Create a dict to pass the outputs
         return_out_dict = dict()
-        return_out_dict['clf_poly'] = clf_poly
+        return_out_dict['_clf_poly'] = _clf_poly
         return_out_dict['spareMulti-Index'] = sparse_basis_indices
         return_out_dict['sparePsi'] = sparse_X
         return_out_dict['coeffs'] = coeffs
@@ -623,13 +622,13 @@ class PCE(MetaModel):
         # Initialization
         qAllCoeffs, AllCoeffs = {}, {}
         qAllIndices_Sparse, AllIndices_Sparse = {}, {}
-        qAllclf_poly, Allclf_poly = {}, {}
+        qAll_clf_poly, All_clf_poly = {}, {}
         qAllnTerms, AllnTerms = {}, {}
-        qAllLCerror, AllLCerror = {}, {}
+        qAll_LCerror, All_LCerror = {}, {}
 
         # Extract degree array and q-norm array
-        deg_array = np.array([*self.allBasisIndices], dtype=int)
-        qnorm = [*self.allBasisIndices[str(int(deg_array[0]))]]
+        _deg_array = np.array([*self._allBasisIndices], dtype=int)
+        qnorm = [*self._allBasisIndices[str(int(_deg_array[0]))]]
 
         # Some options for EarlyStop
         errorIncreases = False
@@ -648,40 +647,40 @@ class PCE(MetaModel):
         # =====================================================================
         # For each degree check all q-norms and choose the best one
         best_q = None
-        scores = -np.inf * np.ones(deg_array.shape[0])
+        scores = -np.inf * np.ones(_deg_array.shape[0])
         qNormScores = -np.inf * np.ones(nqnorms)
 
-        for degIdx, deg in enumerate(deg_array):
+        for degIdx, deg in enumerate(_deg_array):
 
             for qidx, q in enumerate(qnorm):
 
                 # Extract the polynomial basis indices from the pool of
-                # allBasisIndices
-                BasisIndices = self.allBasisIndices[str(deg)][str(q)]
+                # _allBasisIndices
+                BasisIndices = self._allBasisIndices[str(deg)][str(q)]
 
                 # Assemble the Psi matrix
-                Psi = create_psi(BasisIndices, self.univ_p_val)
+                Psi = create_psi(BasisIndices, self._univ_p_val)
 
                 # Calulate the cofficients of the metamodel
                 outs = self.regression(Psi, ED_Y, BasisIndices)
 
                 # Calculate and save the score of LOOCV
-                score, LCerror = corr_loocv_error(outs['clf_poly'],
+                score, _LCerror = corr_loocv_error(outs['_clf_poly'],
                                                   outs['sparePsi'],
                                                   outs['coeffs'],
                                                   ED_Y)
 
                 # Check the convergence of noise for FastARD
-                if self.pce_reg_method == 'FastARD' and \
-                        outs['clf_poly'].alpha_ < np.finfo(np.float32).eps:
+                if self._pce_reg_method == 'FastARD' and \
+                        outs['_clf_poly'].alpha_ < np.finfo(np.float32).eps:
                     score = -np.inf
 
                 qNormScores[qidx] = score
                 qAllCoeffs[str(qidx + 1)] = outs['coeffs']
                 qAllIndices_Sparse[str(qidx + 1)] = outs['spareMulti-Index']
-                qAllclf_poly[str(qidx + 1)] = outs['clf_poly']
+                qAll_clf_poly[str(qidx + 1)] = outs['_clf_poly']
                 qAllnTerms[str(qidx + 1)] = BasisIndices.shape[0]
-                qAllLCerror[str(qidx + 1)] = LCerror
+                qAll_LCerror[str(qidx + 1)] = _LCerror
 
                 # EarlyStop check
                 # if there are at least n_checks_qNorm entries after the
@@ -703,9 +702,9 @@ class PCE(MetaModel):
 
             AllCoeffs[str(degIdx + 1)] = qAllCoeffs[str(best_q + 1)]
             AllIndices_Sparse[str(degIdx + 1)] = qAllIndices_Sparse[str(best_q + 1)]
-            Allclf_poly[str(degIdx + 1)] = qAllclf_poly[str(best_q + 1)]
+            All_clf_poly[str(degIdx + 1)] = qAll_clf_poly[str(best_q + 1)]
             AllnTerms[str(degIdx + 1)] = qAllnTerms[str(best_q + 1)]
-            AllLCerror[str(degIdx + 1)] = qAllLCerror[str(best_q + 1)]
+            All_LCerror[str(degIdx + 1)] = qAll_LCerror[str(best_q + 1)]
 
             # Check the direction of the error (on average):
             # if it increases consistently stop the iterations
@@ -727,11 +726,11 @@ class PCE(MetaModel):
         best_deg = np.nanargmax(scores) + 1
         coeffs = AllCoeffs[str(best_deg)]
         basis_indices = AllIndices_Sparse[str(best_deg)]
-        clf_poly = Allclf_poly[str(best_deg)]
+        _clf_poly = All_clf_poly[str(best_deg)]
         LOOCVScore = np.nanmax(scores)
         P = AllnTerms[str(best_deg)]
-        LCerror = AllLCerror[str(best_deg)]
-        degree = deg_array[np.nanargmax(scores)]
+        _LCerror = All_LCerror[str(best_deg)]
+        degree = _deg_array[np.nanargmax(scores)]
         qnorm = float(qnorm[best_q])
 
         # ------------------ Print out Summary of results ------------------
@@ -742,7 +741,7 @@ class PCE(MetaModel):
 
             print(f'Output variable {varIdx + 1}:')
             print('The estimation of PCE coefficients converged at polynomial '
-                  f'degree {deg_array[best_deg - 1]} with '
+                  f'degree {_deg_array[best_deg - 1]} with '
                   f'{len(BasisIndices_Sparse)} terms (Sparsity index = '
                   f'{round(len(BasisIndices_Sparse) / P, 3)}).')
 
@@ -755,24 +754,24 @@ class PCE(MetaModel):
             print('=' * 50)
 
             print("Scores:\n", scores)
-            print("Degree of best score:", self.deg_array[best_deg - 1])
+            print("Degree of best score:", self._deg_array[best_deg - 1])
             print("No. of terms:", len(basis_indices))
             print("Sparsity index:", round(len(basis_indices) / P, 3))
             print("Best Indices:\n", basis_indices)
 
-            if self.pce_reg_method in ['BRR', 'ARD']:
+            if self._pce_reg_method in ['BRR', 'ARD']:
                 fig, ax = plt.subplots(figsize=(12, 10))
                 plt.title("Marginal log-likelihood")
-                plt.plot(clf_poly.scores_, color='navy', linewidth=2)
+                plt.plot(_clf_poly.scores_, color='navy', linewidth=2)
                 plt.ylabel("Score")
                 plt.xlabel("Iterations")
-                if self.pce_reg_method.lower() == 'bbr':
-                    text = f"$\\alpha={clf_poly.alpha_:.1f}$\n"
-                    f"$\\lambda={clf_poly.lambda_:.3f}$\n"
-                    f"$L={clf_poly.scores_[-1]:.1f}$"
+                if self._pce_reg_method.lower() == 'bbr':
+                    text = f"$\\alpha={_clf_poly.alpha_:.1f}$\n"
+                    f"$\\lambda={_clf_poly.lambda_:.3f}$\n"
+                    f"$L={_clf_poly.scores_[-1]:.1f}$"
                 else:
-                    text = f"$\\alpha={clf_poly.alpha_:.1f}$\n$"
-                    f"\\L={clf_poly.scores_[-1]:.1f}$"
+                    text = f"$\\alpha={_clf_poly.alpha_:.1f}$\n$"
+                    f"\\L={_clf_poly.scores_[-1]:.1f}$"
 
                 # TODO: save this figure and close it, do not show
                 plt.text(0.75, 0.5, text, fontsize=18, transform=ax.transAxes)
@@ -781,92 +780,16 @@ class PCE(MetaModel):
 
         # Create a dict to pass the outputs
         returnVars = dict()
-        returnVars['clf_poly'] = clf_poly
+        returnVars['_clf_poly'] = _clf_poly
         returnVars['degree'] = degree
         returnVars['qnorm'] = qnorm
         returnVars['coeffs'] = coeffs
         returnVars['multi_indices'] = basis_indices
         returnVars['LOOCVScore'] = LOOCVScore
-        returnVars['LCerror'] = LCerror
+        returnVars['_LCerror'] = _LCerror
 
         return returnVars
 
-    # -------------------------------------------------------------------------
-    def pca_transformation(self, target):
-        """
-        Transforms the targets (outputs) via Principal Component Analysis.
-        The number of features is set by `self.n_pca_components`.
-        If this is not given, `self.var_pca_threshold` is used as a threshold.
-
-        Parameters
-        ----------
-        target : array of shape (n_samples,)
-            Target values.
-
-        Returns
-        -------
-        pca : obj
-            Fitted sklearnPCA object.
-        OutputMatrix : array of shape (n_samples,)
-            Transformed target values.
-        n_pca_components : int
-            Number of selected principal components.
-
-        """
-        # Get current shape of the outputs
-        n_samples, n_features = target.shape
-        
-        # Use the given n_pca_components
-        n_pca_components = self.n_pca_components
-        
-        # Switch to var_pca if n_pca_components is too large
-        if (n_pca_components is not None) and (n_pca_components > n_features):
-            n_pca_components = None
-            if self.verbose:
-                print('')
-                print('WARNING: Too many components are set for PCA. The transformation will proceed based on explainable variance.')
-        
-        # Calculate n_pca_components based on decomposition of the variance
-        if n_pca_components is None:
-            if self.var_pca_threshold is not None:
-                var_pca_threshold = self.var_pca_threshold
-            else:
-                var_pca_threshold = 99#100.0
-            # Instantiate and fit sklearnPCA object
-            covar_matrix = sklearnPCA(n_components=None)
-            covar_matrix.fit(target)
-            var = np.cumsum(np.round(covar_matrix.explained_variance_ratio_,
-                                     decimals=5) * 100)
-            # Find the number of components to explain self.varPCAThreshold of
-            # variance
-            try:
-                n_components = np.where(var >= var_pca_threshold)[0][0] + 1
-            except IndexError:
-                n_components = min(n_samples, n_features)
-
-            n_pca_components = min(n_samples, n_features, n_components)
-
-        # Print out a report
-        #if self.verbose:
-        #    print()
-        #    print('-' * 50)
-        #    print(f"PCA transformation is performed with {n_pca_components}"
-        #          " components.")
-        #    print('-' * 50)
-        #    print()
-
-        # Set the solver to 'auto' if no reduction is wanted
-        # Otherwise use 'arpack'
-        solver = 'auto'
-        if n_pca_components < n_features:
-            solver = 'arpack'
-            
-        # Fit and transform with the selected number of components
-        pca = sklearnPCA(n_components=n_pca_components, svd_solver=solver)
-        scaled_target = pca.fit_transform(target)
-
-        return pca, scaled_target, n_pca_components
-
     # -------------------------------------------------------------------------
     def eval_metamodel(self, samples):
         """
@@ -894,10 +817,10 @@ class PCE(MetaModel):
             method='user'
         )
         # Compute univariate bases for the given samples
-        univ_p_val = None
-        univ_p_val = self.univ_basis_vals(
+        _univ_p_val = None
+        _univ_p_val = self.univ_basis_vals(
             samples,
-            n_max=np.max(self.pce_deg)
+            n_max=np.max(self._pce_deg)
         )
 
         mean_pred = None
@@ -909,7 +832,7 @@ class PCE(MetaModel):
         for b_i in range(self.n_bootstrap_itrs):
 
             # Extract model dictionary
-            model_dict = self.coeffs_dict[f'b_{b_i + 1}']
+            model_dict = self._coeffs_dict[f'b_{b_i + 1}']
 
             # Loop over outputs
             mean_pred = {}
@@ -922,23 +845,25 @@ class PCE(MetaModel):
                 for in_key, InIdxValues in values.items():
 
                     # Assemble Psi matrix
-                    basis = self.basis_dict[f'b_{b_i + 1}'][output][in_key]
-                    psi = create_psi(basis, univ_p_val)
+                    basis = self._basis_dict[f'b_{b_i + 1}'][output][in_key]
+                    psi = create_psi(basis, _univ_p_val)
+                    
+                    #print(psi.shape)
 
                     # Prediction
                     if self.bootstrap_method != 'fast' or b_i == 0:
-                        # with error bar, i.e. use clf_poly
-                        clf_poly = self.clf_poly[f'b_{b_i + 1}'][output][in_key]
+                        # with error bar, i.e. use _clf_poly
+                        _clf_poly = self._clf_poly[f'b_{b_i + 1}'][output][in_key]
                         try:
-                            y_mean, y_std = clf_poly.predict(
+                            y_mean, y_std = _clf_poly.predict(
                                 psi, return_std=True
                             )
                         except TypeError:
-                            y_mean = clf_poly.predict(psi)
+                            y_mean = _clf_poly.predict(psi)
                             y_std = np.zeros_like(y_mean)
                     else:
                         # without error bar
-                        coeffs = self.coeffs_dict[f'b_{b_i + 1}'][output][in_key]
+                        coeffs = self._coeffs_dict[f'b_{b_i + 1}'][output][in_key]
                         y_mean = np.dot(psi, coeffs)
                         y_std = np.zeros_like(y_mean)
 
@@ -998,22 +923,22 @@ class PCE(MetaModel):
 
         Returns
         -------
-        deg_array: array
+        _deg_array: array
             The selected degrees.
 
         """
-        # Define the deg_array
-        max_deg = np.max(self.pce_deg)
-        min_Deg = np.min(self.pce_deg)
+        # Define the _deg_array
+        max_deg = np.max(self._pce_deg)
+        min_Deg = np.min(self._pce_deg)
 
         # TODO: remove the options for sequential?
         nitr = n_samples - self.InputSpace.n_init_samples
 
         # Check q-norm
-        if not np.isscalar(self.pce_q_norm):
-            self.pce_q_norm = np.array(self.pce_q_norm)
+        if not np.isscalar(self._pce_q_norm):
+            self._pce_q_norm = np.array(self._pce_q_norm)
         else:
-            self.pce_q_norm = np.array([self.pce_q_norm])
+            self._pce_q_norm = np.array([self._pce_q_norm])
 
         # def M_uptoMax(maxDeg):
         #    n_combo = np.zeros(maxDeg)
@@ -1028,14 +953,14 @@ class PCE(MetaModel):
         # min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d))
         # deg_new = range(1, max_deg+1)[min_index]
 
-        if deg_new > min_Deg and self.pce_reg_method.lower() != 'fastard':
-            deg_array = np.arange(min_Deg, deg_new + 1)
+        if deg_new > min_Deg and self._pce_reg_method.lower() != 'fastard':
+            _deg_array = np.arange(min_Deg, deg_new + 1)
         else:
-            deg_array = np.array([deg_new])
+            _deg_array = np.array([deg_new])
 
-        return deg_array
+        return _deg_array
 
-    def generate_polynomials(self, max_deg=None):
+    def _generate_polynomials(self, max_deg=None):
         """
         Generates (univariate) polynomials.
 
@@ -1055,14 +980,14 @@ class PCE(MetaModel):
         ndim = self.InputSpace.ndim
         # Create orthogonal polynomial coefficients if necessary
         if max_deg is not None:  # and self.input_obj.poly_coeffs_flag:
-            self.polycoeffs = {}
+            self._polycoeffs = {}
             for parIdx in tqdm(range(ndim), ascii=True,
                                desc="Computing orth. polynomial coeffs"):
                 poly_coeffs = apoly_construction(
                     self.InputSpace.raw_data[parIdx],
                     max_deg
                 )
-                self.polycoeffs[f'p_{parIdx + 1}'] = poly_coeffs
+                self._polycoeffs[f'p_{parIdx + 1}'] = poly_coeffs
         else:
             raise AttributeError('MetaModel cannot generate polynomials in the given scenario!')
 
@@ -1086,24 +1011,24 @@ class PCE(MetaModel):
         # Loop over bootstrap iterations
         for b_i in range(self.n_bootstrap_itrs):
             # Loop over the metamodels
-            coeffs_dicts = self.coeffs_dict[f'b_{b_i + 1}'].items()
+            _coeffs_dicts = self._coeffs_dict[f'b_{b_i + 1}'].items()
             means = {}
             stds = {}
-            for output, coef_dict in coeffs_dicts:
+            for output, coef_dict in _coeffs_dicts:
 
                 pce_mean = np.zeros((len(coef_dict)))
                 pce_var = np.zeros((len(coef_dict)))
 
                 for index, values in coef_dict.items():
                     idx = int(index.split('_')[1]) - 1
-                    coeffs = self.coeffs_dict[f'b_{b_i + 1}'][output][index]
+                    coeffs = self._coeffs_dict[f'b_{b_i + 1}'][output][index]
 
                     # Mean = c_0
                     if coeffs[0] != 0:
                         pce_mean[idx] = coeffs[0]
                     else:
-                        clf_poly = self.clf_poly[f'b_{b_i + 1}'][output]
-                        pce_mean[idx] = clf_poly[index].intercept_
+                        _clf_poly = self._clf_poly[f'b_{b_i + 1}'][output]
+                        pce_mean[idx] = _clf_poly[index].intercept_
                     # Var = sum(coeffs[1:]**2)
                     pce_var[idx] = np.sum(np.square(coeffs[1:]))
 
@@ -1142,6 +1067,8 @@ class PCE(MetaModel):
 
         return pce_means, pce_stds
     
+
+    
     
     # -------------------------------------------------------------------------
 #    def create_model_error(self, X, y, MeasuredData):
@@ -1165,9 +1092,9 @@ class PCE(MetaModel):
 # 
 #         """
 #         outputNames = self.out_names
-#         self.errorRegMethod = 'GPE'
-#         self.errorclf_poly = self.auto_vivification()
-#         self.errorScale = self.auto_vivification()
+#         self._errorRegMethod = 'GPE'
+#         self._error_clf_poly = self.auto_vivification()
+#         self._errorScale = self.auto_vivification()
 # 
 #         # Read data
 #         # TODO: do this call outside the metamodel
@@ -1189,8 +1116,8 @@ class PCE(MetaModel):
 #             X_S = scaler.fit_transform(BiasInputs)
 #             gp = gaussian_process_emulator(X_S, delta)
 # 
-#             self.errorScale[out]["y_1"] = scaler
-#             self.errorclf_poly[out]["y_1"] = gp
+#             self._errorScale[out]["y_1"] = scaler
+#             self._error_clf_poly[out]["y_1"] = gp
 # 
 #         return self
 # 
@@ -1219,15 +1146,15 @@ class PCE(MetaModel):
 #         mean_pred = {}
 #         std_pred = {}
 # 
-#         for Outkey, ValuesDict in self.errorclf_poly.items():
+#         for Outkey, ValuesDict in self._error_clf_poly.items():
 # 
 #             pred_mean = np.zeros_like(y_pred[Outkey])
 #             pred_std = np.zeros_like(y_pred[Outkey])
 # 
 #             for Inkey, InIdxValues in ValuesDict.items():
 # 
-#                 gp = self.errorclf_poly[Outkey][Inkey]
-#                 scaler = self.errorScale[Outkey][Inkey]
+#                 gp = self._error_clf_poly[Outkey][Inkey]
+#                 scaler = self._errorScale[Outkey][Inkey]
 # 
 #                 # Transform Samples using scaler
 #                 for j, pred in enumerate(y_pred[Outkey]):
diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py
index bb9f22524..a8537f666 100644
--- a/src/bayesvalidrox/surrogate_models/surrogate_models.py
+++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py
@@ -85,24 +85,23 @@ class MetaModel:
         # Other params
         self.InputSpace = None
         self.var_pca_threshold = None
-        self.errorScale = None
-        self.errorclf_poly = None
-        self.errorRegMethod = None
-        self.nlc = None
+        #self.errorScale = None
+        #self.errorclf_poly = None
+        #self.errorRegMethod = None
         self.n_pca_components = None
         self.out_names = None
         self.n_samples = None
-        self.CollocationPoints = None
+        self.CollocationPoints = None # TODO: rename this
         self.pca = None
         self.LCerror = None
-        self.score_dict = None
-        self.basis_dict = None
-        self.x_scaler = None
-        self.gp_poly = None
+        #self.score_dict = None
+        #self.basis_dict = None
+        #self.x_scaler = None
+        #self.gp_poly = None
         self.n_params = None
         self.ndim = None
-        self.init_type = None
-        self.rmse = None
+        #self.init_type = None # TODO: this parameter seems to not be used
+        #self.rmse = None # TODO: check if this is needed 
         
     def check_is_gaussian(self) -> bool:
         """
@@ -243,6 +242,21 @@ class MetaModel:
             self.is_gaussian == False
         """
         return None, None
+    
+    def compute_moments(self):
+        """
+        Computes the first two moments of the metamodel.
+
+        Returns
+        -------
+        means: dict
+            The first moment (mean) of the surrogate.
+        stds: dict
+            The second moment (standard deviation) of the surrogate.
+
+        """
+        return None, None
+    
 
     # -------------------------------------------------------------------------
     class auto_vivification(dict):
-- 
GitLab