From e4868d4e233a47d044ad9c0df6c804412801f6b2 Mon Sep 17 00:00:00 2001
From: Maria Fernanda Morales <65073126+mfmo45@users.noreply.github.com>
Date: Mon, 2 Sep 2024 14:22:58 +0200
Subject: [PATCH] Created a GP-Skl class, tested

---
 .../example_analytical_function_gp.py         | 309 ++++++++++++
 src/bayesvalidrox/__init__.py                 |   4 +-
 .../gaussian_process_sklearn.py               | 466 ++++++++++++++++++
 3 files changed, 778 insertions(+), 1 deletion(-)
 create mode 100644 examples/analytical-function/example_analytical_function_gp.py
 create mode 100644 src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py

diff --git a/examples/analytical-function/example_analytical_function_gp.py b/examples/analytical-function/example_analytical_function_gp.py
new file mode 100644
index 000000000..9788ac701
--- /dev/null
+++ b/examples/analytical-function/example_analytical_function_gp.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This test shows a surrogate-assisted Bayesian calibration of a time dependent
+    analytical function.
+
+Author: Farid Mohammadi, M.Sc.
+E-Mail: farid.mohammadi@iws.uni-stuttgart.de
+Department of Hydromechanics and Modelling of Hydrosystems (LH2)
+Institute for Modelling Hydraulic and Environmental Systems (IWS), University
+of Stuttgart, www.iws.uni-stuttgart.de/lh2/
+Pfaffenwaldring 61
+70569 Stuttgart
+
+Created on Fri Aug 9 2019
+
+"""
+
+import numpy as np
+import pandas as pd
+import sys
+import joblib
+import matplotlib
+
+matplotlib.use('agg')
+
+# import bayesvalidrox
+# Add BayesValidRox path
+sys.path.append("../../src/")
+print(sys.path)
+
+from bayesvalidrox import PyLinkForwardModel, Input, ExpDesigns, GPESkl, PostProcessing, BayesInference, Discrepancy, \
+    Engine
+
+if __name__ == "__main__":
+
+    # Number of parameters
+    ndim = 10  # 2, 10
+
+    # =====================================================
+    # =============   COMPUTATIONAL MODEL  ================
+    # =====================================================
+    Model = PyLinkForwardModel()
+
+    Model.link_type = 'Function'
+    Model.py_file = 'analytical_function'
+    Model.name = 'AnalyticFunc'
+
+    Model.Output.names = ['Z']
+
+    # For Bayesian inversion synthetic data with X=[0,0]
+    Model.observations = {}
+    Model.observations['Time [s]'] = np.arange(0, 10, 1.) / 9
+    Model.observations['Z'] = np.repeat([2.], 10)
+
+    # For Checking with the MonteCarlo refrence
+    Model.mc_reference = {}
+    Model.mc_reference['Time [s]'] = np.arange(0, 10, 1.) / 9
+    Model.mc_reference['mean'] = np.load(f"data/mean_{ndim}.npy")
+    Model.mc_reference['std'] = np.load(f"data/std_{ndim}.npy")
+
+    # =====================================================
+    # =========   PROBABILISTIC INPUT MODEL  ==============
+    # =====================================================
+    # Define the uncertain parameters with their mean and
+    # standard deviation
+    Inputs = Input()
+
+    # Assuming dependent input variables
+    # Inputs.Rosenblatt = True
+
+    for i in range(ndim):
+        Inputs.add_marginals()
+        Inputs.Marginals[i].name = "$\\theta_{" + str(i + 1) + "}$"
+        Inputs.Marginals[i].dist_type = 'uniform'
+        Inputs.Marginals[i].parameters = [-5, 5]
+
+    # arbitrary polynomial chaos
+    # inputParams = np.load('data/InputParameters_{}.npy'.format(ndim))
+    # for i in range(ndim):
+    #     Inputs.add_marginals()
+    #     Inputs.Marginals[i].name = f'$X_{i+1}$'
+    #     Inputs.Marginals[i].input_data = inputParams[:, i]
+
+    # =====================================================
+    # ==========  DEFINITION OF THE METAMODEL  ============
+    # =====================================================
+    MetaModelOpts = GPESkl(Inputs)  # , Model)
+
+    # Select if you want to preserve the spatial/temporal depencencies
+    # MetaModelOpts.dim_red_method = 'PCA'
+    # MetaModelOpts.var_pca_threshold = 99.999
+    # MetaModelOpts.n_pca_components = 10
+
+    # ToDo: This should not be an option for GPEs
+    # Select your metamodel method
+    # 1) PCE (Polynomial Chaos Expansion) 2) aPCE (arbitrary PCE)
+    # 3) GPE (Gaussian Process Emulator)
+    MetaModelOpts.meta_model_type = 'GPE'
+
+    # ------------------------------------------------
+    # ------------- GPE Specification ----------------
+    # ------------------------------------------------
+    # Select the solver for solving for the GP Hyperparameters using the ML approach
+    # ToDo: Remove this as a user-defined parameter, since only one is available?
+    # 1)LBFGS: only option for Scikit Learn
+    MetaModelOpts._gpe_reg_method = 'LBFGS'
+
+    # Kernel options ----------------------------
+    # Loop over different Kernels:
+    # 1) True to loop over the different kernel types and select the best one
+    MetaModelOpts._autoSelect = False
+
+    # Select Kernel type:
+    # 1) RBF: Gaussian/squared exponential kernel
+    # 2) Matern Kernel
+    # 3) RQ: Rational Quadratic kernel
+    MetaModelOpts._kernel_type = 'RBF'
+
+    MetaModelOpts._kernel_isotropy = False      # Kernel isotropy
+    MetaModelOpts._nugget = 1e-9                # Set regularization parameter (constant)
+    MetaModelOpts._kernel_noise = False         # Optimize regularization parameter
+
+    # Bootstraping
+    # 1) normal 2) fast
+    # MetaModelOpts.bootstrap_method = 'fast'  # ToDo: Remove this as a user-defined parameter, not used for GP?
+    MetaModelOpts.n_bootstrap_itrs = 1
+
+    # Print summary of the regression results
+    # MetaModelOpts.verbose = True
+
+    # ------------------------------------------------
+    # ------ Experimental Design Configuration -------
+    # ------------------------------------------------
+    ExpDesign = ExpDesigns(Inputs)
+
+    # One-shot (normal) or Sequential Adaptive (sequential) Design
+    ExpDesign.method = 'sequential'
+    ExpDesign.n_init_samples = 140  # 00#3*ndim
+
+    # Sampling methods
+    # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley
+    # 6) chebyshev(FT) 7) grid(FT) 8)user
+    ExpDesign.sampling_method = 'sobol'
+
+    # Provide the experimental design object with a hdf5 file
+    # ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5'
+
+    # Set the sampling parameters
+    ExpDesign.n_new_samples = 100
+    ExpDesign.n_max_samples = 141  # 150          # sum of init + sequential
+    ExpDesign.mod_LOO_threshold = 1e-16
+
+    # ExpDesign.adapt_verbose = True
+    # 1) None 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive'
+    ExpDesign.tradeoff_scheme = None
+    # MetaModelOpts.ExpDesign.n_replication = 5
+    # -------- Exploration ------
+    # 1)'Voronoi' 2)'random' 3)'latin_hypercube' 4)'LOOCV' 5)'dual annealing'
+    ExpDesign.explore_method = 'random'
+
+    # Use when 'dual annealing' chosen
+    ExpDesign.max_func_itr = 1000
+
+    # Use when 'Voronoi' or 'random' or 'latin_hypercube' chosen
+    ExpDesign.n_canddidate = 1000
+    ExpDesign.n_cand_groups = 4
+
+    # -------- Exploitation ------
+    # 1)'BayesOptDesign' 2)'BayesActDesign' 3)'VarOptDesign' 4)'alphabetic'
+    # 5)'Space-filling'
+    ExpDesign.exploit_method = 'Space-filling'
+    ExpDesign.exploit_method = 'BayesActDesign'
+    ExpDesign.util_func = 'DKL'
+
+    # BayesOptDesign/BayesActDesign -> when data is available
+    # 1) MI (Mutual information) 2) ALC (Active learning McKay)
+    # 2)DKL (Kullback-Leibler Divergence) 3)DPP (D-Posterior-percision)
+    # 4)APP (A-Posterior-percision)  # ['DKL', 'BME', 'infEntropy']
+    # ExpDesign.util_func = 'DKL'
+
+    # BayesActDesign -> when data is available
+    # 1) BME (Bayesian model evidence) 2) infEntropy (Information entropy)
+    # 2)DKL (Kullback-Leibler Divergence)
+    # ExpDesign.util_func = 'DKL'
+
+    # VarBasedOptDesign -> when data is not available
+    # 1)ALM 2)EIGF, 3)LOOCV
+    # or a combination as a list
+    # ExpDesign.util_func = 'EIGF'
+
+    # alphabetic
+    # 1)D-Opt (D-Optimality) 2)A-Opt (A-Optimality)
+    # 3)K-Opt (K-Optimality) or a combination as a list
+    # ExpDesign.util_func = 'D-Opt'
+
+    # Defining the measurement error, if it's known a priori
+    obsData = pd.DataFrame(Model.observations, columns=Model.Output.names)
+    DiscrepancyOpts = Discrepancy('')
+    DiscrepancyOpts.type = 'Gaussian'
+    DiscrepancyOpts.parameters = obsData ** 2
+    MetaModelOpts.Discrepancy = DiscrepancyOpts
+
+    # Plot the posterior snapshots for SeqDesign
+    ExpDesign.post_snapshot = False
+    ExpDesign.step_snapshot = 1
+    ExpDesign.max_a_post = [0] * ndim
+
+    # For calculation of validation error for SeqDesign
+    prior = np.load(f"data/Prior_{ndim}.npy")
+    prior_outputs = np.load(f"data/origModelOutput_{ndim}.npy")
+    likelihood = np.load(f"data/validLikelihoods_{ndim}.npy")
+    ExpDesign.valid_samples = prior[:500]
+    ExpDesign.valid_model_runs = {'Z': prior_outputs[:500]}
+
+    # Run using the engine
+    engine = Engine(MetaModelOpts, Model, ExpDesign)
+    # engine.train_sequential()
+    engine.train_normal()
+
+    # Load the objects
+    # with open(f"PCEModel_{Model.name}.pkl", "rb") as input:
+    #     PCEModel = joblib.load(input)
+
+    # =====================================================
+    # =========  POST PROCESSING OF METAMODELS  ===========
+    # =====================================================
+    PostPCE = PostProcessing(engine)
+
+    # Plot to check validation visually.
+    PostPCE.valid_metamodel(n_samples=1)
+
+    # Compute and print RMSE error
+    PostPCE.check_accuracy(n_samples=300)
+
+    # =====================================================
+    # ========  Bayesian inference with Emulator ==========
+    # =====================================================
+    BayesOpts = BayesInference(engine)
+    BayesOpts.emulator = True
+    BayesOpts.plot_post_pred = True
+
+    # BayesOpts.selected_indices = [0, 3, 5,  7, 9]
+    # BME Bootstrap
+    BayesOpts.bootstrap = True
+    BayesOpts.n_bootstrap_itrs = 500
+    BayesOpts.bootstrap_noise = 100
+
+    # Bayesian cross validation
+    BayesOpts.bayes_loocv = True  # TODO: test what this does
+
+    # Select the inference method
+    import emcee
+
+    BayesOpts.inference_method = "MCMC"
+    # Set the MCMC parameters passed to self.mcmc_params
+    BayesOpts.mcmc_params = {
+        'n_steps': 1e4,  # 5,
+        'n_walkers': 30,
+        'moves': emcee.moves.KDEMove(),
+        'multiprocessing': False,
+        'verbose': False
+    }
+
+    # ----- Define the discrepancy model -------
+    obsData = pd.DataFrame(Model.observations, columns=Model.Output.names)
+    BayesOpts.measurement_error = obsData
+
+    # # -- (Option B) --
+    DiscrepancyOpts = Discrepancy('')
+    DiscrepancyOpts.type = 'Gaussian'
+    DiscrepancyOpts.parameters = obsData ** 2
+    BayesOpts.Discrepancy = DiscrepancyOpts
+
+    # -- (Option C) --
+    if 0:
+        DiscOutputOpts = Input()
+        # # # OutputName = 'Z'
+        DiscOutputOpts.add_marginals()
+        DiscOutputOpts.Marginals[0].Nnme = '$\sigma^2_{\epsilon}$'
+        DiscOutputOpts.Marginals[0].dist_type = 'uniform'
+        DiscOutputOpts.Marginals[0].parameters = [0, 10]
+        # BayesOpts.Discrepancy = {'known': DiscrepancyOpts,
+        #                          'infer': Discrepancy(DiscOutputOpts)}
+
+        BayesOpts.bias_inputs = {'Z': np.arange(0, 10, 1.).reshape(-1, 1) / 9}
+
+        DiscOutputOpts = Input()
+        # OutputName = 'lambda'
+        DiscOutputOpts.add_marginals()
+        DiscOutputOpts.Marginals[0].name = '$\lambda$'
+        DiscOutputOpts.Marginals[0].dist_type = 'uniform'
+        DiscOutputOpts.Marginals[0].parameters = [0, 1]
+
+        # # OutputName = 'sigma_f'
+        DiscOutputOpts.add_marginals()
+        DiscOutputOpts.Marginals[1].Name = '$\sigma_f$'
+        DiscOutputOpts.Marginals[1].dist_type = 'uniform'
+        DiscOutputOpts.Marginals[1].parameters = [0, 1e-4]
+        # BayesOpts.Discrepancy = Discrepancy(DiscOutputOpts)
+        BayesOpts.Discrepancy = {'known': DiscrepancyOpts,
+                                 'infer': Discrepancy(DiscOutputOpts)}
+
+    # Start the calibration/inference
+    Bayes_PCE = BayesOpts.create_inference()
+
+    # Save class objects
+    with open(f'Bayes_{Model.name}.pkl', 'wb') as output:
+        joblib.dump(Bayes_PCE, output, 2)
diff --git a/src/bayesvalidrox/__init__.py b/src/bayesvalidrox/__init__.py
index 55cfa8ed1..a824212ed 100644
--- a/src/bayesvalidrox/__init__.py
+++ b/src/bayesvalidrox/__init__.py
@@ -4,6 +4,7 @@ __version__ = "1.1.0"
 from .pylink.pylink import PyLinkForwardModel
 from .surrogate_models.surrogate_models import MetaModel
 from .surrogate_models.polynomial_chaos import PCE
+from .surrogate_models.gaussian_process_sklearn import GPESkl
 from .surrogate_models.engine import Engine
 from .surrogate_models.inputs import Input
 from .surrogate_models.exp_designs import ExpDesigns
@@ -23,5 +24,6 @@ __all__ = [
     "ExpDesigns",
     "PostProcessing",
     "BayesInference",
-    "BayesModelComparison"
+    "BayesModelComparison",
+    "GPESkl"
     ]
diff --git a/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py b/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py
new file mode 100644
index 000000000..e50cc20ad
--- /dev/null
+++ b/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py
@@ -0,0 +1,466 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Implementation of metamodel as GPE, using the Scikit-Learn library
+"""
+
+import copy
+import math
+import os
+import warnings
+
+import functools
+import matplotlib.pyplot as plt
+import numpy as np
+import sklearn.gaussian_process.kernels as kernels
+from joblib import Parallel, delayed
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+from tqdm import tqdm
+
+from .surrogate_models import MetaModel, _preprocessing_fit, _bootstrap_fit, _preprocessing_eval, _bootstrap_eval
+
+warnings.filterwarnings("ignore")
+# warnings.simplefilter('default')
+# Load the mplstyle
+# noinspection SpellCheckingInspection
+plt.style.use(os.path.join(os.path.split(__file__)[0],
+                           '../', 'bayesvalidrox.mplstyle'))
+
+
+class MySklGPE(GaussianProcessRegressor):
+    """
+    GP ScikitLearn class, to change the default values for maximum iterations and optimization tolerance.
+    """
+    def __init__(self, max_iter=10_000, gtol=1e-6, **kwargs):
+        super().__init__(**kwargs)
+        self.max_iter = max_iter
+        self.gtol = gtol
+
+
+class GPESkl(MetaModel):
+    """
+    GP MetaModel using the Scikit-Learn library
+
+    This class trains a surrogate model of type Gaussian Process Regression.
+    It accepts an input object (input_obj)
+    containing the specification of the distributions for uncertain parameters
+    and a model object with instructions on how to run the computational model.
+
+    Attributes
+    ----------
+    input_obj : obj
+        Input object with the information on the model input parameters.
+    _meta_model_type : str
+        Surrogate model types, in this case GPE.
+        Default is GPE.
+    gpe_reg_method : str
+        GPE regression method to compute the kernel hyperparameters. The following
+        regression methods are available for Scikit-Learn library
+        1. LBFGS:
+        Default is `LBFGS`.
+    autoSelect: bool
+        Flag to loop through different available Kernels and select the best one based on BME criteria.
+        Default is False.
+    kernel_type: str
+        Type of kernel to use and train for. The following Scikit-Learn kernels are available:
+        1. RBF: Squared exponential kernel
+        2. Matern: Matern kernel
+        3. RQ: rational quadratic kernel
+        Default is `'RBF'` kernel.
+    isotropy: bool
+        Flat to train an isotropic kernel (one length scale for all input parameters) or
+         an anisotropic kernel (a length scale for each dimension). True for isotropy, False for anisotropic kernel
+        Default is True
+    noisy: bool
+        Consider a WhiteKernel for regularization purposes, and optimize for the noise hyperparameter.
+        Default is False
+    nugget: float
+        Constant value added to the Kernel matrix for regularization purposes (not optimized)
+        Default is 1e-9
+
+    bootstrap_method : str
+        Bootstraping method. Options are `'normal'` and `'fast'`. The default
+        is `'fast'`. It means that in each iteration except the first one, only
+        the coefficent are recalculated with the ordinary least square method.
+    n_bootstrap_itrs : int
+        Number of iterations for the bootstrap sampling. The default is `1`.
+    dim_red_method : str
+        Dimensionality reduction method for the output space. The available
+        method is based on principal component analysis (PCA). The Default is
+        `'no'`. There are two ways to select number of components: use
+        percentage of the explainable variance threshold (between 0 and 100)
+        (Option A) or direct prescription of components' number (Option B):
+            >>> MetaModelOpts = MetaModel()
+            >>> MetaModelOpts.dim_red_method = 'PCA'
+            >>> MetaModelOpts.var_pca_threshold = 99.999  # Option A
+            >>> MetaModelOpts.n_pca_components = 12 # Option B
+    verbose : bool
+        Prints summary of the regression results. Default is `False`.
+    """
+
+    def __init__(self, input_obj, meta_model_type='GPE', gpe_reg_method='lbfgs',
+                 autoSelect=False, kernel_type='RBF', isotropy=True, noisy=False, nugget=1e-9,
+                 bootstrap_method='fast',  # ToDo: Remove this as a user-defined parameter, not used for GP?
+                 n_bootstrap_itrs=1,
+                 dim_red_method='no', verbose=False):
+
+        # Check if the surrogate outputs gaussian results: Always TRUE for GPs
+        is_gaussian = True  # self.check_is_gaussian(n_bootstrap_itrs)
+
+        # Use parent init
+        super().__init__(input_obj, meta_model_type, bootstrap_method,
+                         n_bootstrap_itrs, dim_red_method, is_gaussian, verbose)
+
+        # Additional inputs
+        # Parameters that are not needed from the outside are denoted with '_'
+        self.meta_model_type = meta_model_type
+        self._gpe_reg_method = gpe_reg_method
+        self.regression_dict = {}
+        self._gpe_reg_options = {}
+
+        self._autoSelect = autoSelect
+        self._kernel_isotropy = isotropy
+        self._kernel_noise = noisy
+        self._kernel_type = kernel_type
+        self._nugget = nugget
+
+        # Is currently needed inputs for 'engine', but not for the GPE class:
+        self._pce_deg = 1
+
+        # Other params
+        self._gp_poly = None
+        self._x_scaler = None
+        self._bme_score = None
+        self._kernel_name_dict = None
+        # self._LCerror = None
+
+        # Initialize the regression options as a dictionary
+
+    def check_is_gaussian(self, n_bootstrap_itrs) -> bool:
+        """
+        Check if the metamodel returns a mean and stdev.
+
+        Returns
+        -------
+        bool
+            TRUE
+
+        """
+        return True
+
+    def build_metamodel(self) -> None:
+        """
+        Builds the parts for the metamodel (,...) that are needed before fitting.
+        This is executed outside any loops related to e.g. bootstrap or
+        transformations such as pca.
+
+        Returns
+        -------
+        None
+
+        """
+        # Initialize the nested dictionaries
+        self._gp_poly = self.auto_vivification()
+        self._x_scaler = self.auto_vivification()
+        self._bme_score = self.auto_vivification()
+        self._kernel_name_dict = self.auto_vivification()
+        # self._LCerror = self.auto_vivification()
+
+    def build_kernels(self):
+        """
+        Initializes the different possible kernels, and selects the ones to train for,
+        depending on the input options.
+        ToDo: Add additional kernels
+        ToDo: Add option to include user-defined kernel
+        Returns
+        -------
+        List: with the kernels to iterate over
+        List: with the names of the kernels to iterate over
+        """
+        _ndim = self.InputSpace.ndim
+
+        # Set boundaries for length scales:
+        value = np.empty((), dtype=object)
+        value[()] = [1e-5, 1e5]
+
+        if self._kernel_isotropy:
+            # Isotropic Kernel
+            ls_bounds = list(np.full(1, value, dtype=object))
+            ls_values = 1
+        else:
+            ls_bounds = list(np.full(_ndim, value, dtype=object))
+            ls_values = list(np.full(_ndim, 1))
+
+        # Generate list of probable kernels:
+        rbf_kernel = 1*kernels.RBF(length_scale=ls_values,
+                                   length_scale_bounds=ls_bounds)
+        matern_kernel = 1*kernels.Matern(length_scale=ls_values,
+                                         length_scale_bounds=ls_bounds,
+                                         nu=1.5)
+        rq_kernel = 1*kernels.RationalQuadratic(length_scale=ls_values,
+                                                length_scale_bounds=ls_bounds,
+                                                alpha=1)
+        kernel_dict = {'RBF': rbf_kernel,
+                       'Matern': matern_kernel,
+                       'RQ': rq_kernel}
+
+        if self._autoSelect:
+            kernel_list = list(kernel_dict.values())
+            kernel_names = list(kernel_dict.keys())
+        else:
+            try:
+                kernel_list = [kernel_dict[self._kernel_type]]
+                kernel_names = [self._kernel_type]
+            except:
+                print(f'The kernel option {self._kernel_type} is not available. An RBF kernel was chosen instead')
+                kernel_list = [kernel_dict['RBF']]
+                kernel_names = ['RBF']
+
+        return kernel_list, kernel_names
+
+    @staticmethod
+    def transform_x(X: np.array, transform_type='norm'):
+        """
+        Scales the inputs (X) during training using either normalize ([0, 1]), or standardize (N[0, 1]).
+        If None, then the inputs are not scaled
+        Parameters
+        ----------
+        X: 2D list or np.array of shape (#samples, #dim)
+        The parameter value combinations to train the model with.
+        transform_type: str
+            Transformation to apply to the input parameters. Default is None
+
+        Returns
+        -------
+        np.array: (#samples, #dim)
+            transformed input parameters
+        obj: Scaler object
+            transformation object, for future transformations during surrogate evaluation
+
+        """
+        if transform_type is None:
+            scaler = None
+            X_S = X
+        else:
+            if transform_type.lower() == 'norm':
+                scaler = MinMaxScaler()
+                X_S = scaler.fit_transform(X)
+            elif transform_type.lower() == 'standard':
+                scaler = StandardScaler()
+                X_S = scaler.fit_transform(X)
+            else:
+                print(f'No scaler {transform_type} found. No scaling was done')
+                scaler = None
+                X_S = X
+
+        return X_S, scaler
+
+    @_preprocessing_fit
+    @_bootstrap_fit
+    def fit(self, X: np.array, y: dict, parallel=False, verbose=False, b_i=0):
+        """
+        Fits the surrogate to the given data (samples X, outputs y).
+        Note here that the samples X should be the transformed samples provided
+        by the experimental design if the transformation is used there.
+
+        Parameters
+        ----------
+        X : 2D list or np.array of shape (#samples, #dim)
+            The parameter value combinations that the model was evaluated at.
+        y : dict of 2D lists or arrays of shape (#samples, #timesteps)
+            The respective model evaluations.
+        parallel : bool
+            Set to True to run the training in parallel for various keys.
+            The default is False.
+        verbose : bool
+            Set to True to obtain more information during runtime.
+            The default is False.
+
+        Returns
+        -------
+        None.
+
+        """
+
+        # For loop over the components/outputs
+        if self.verbose and self.n_bootstrap_itrs == 1:
+            items = tqdm(y.items(), desc="Fitting regression")
+        else:
+            items = y.items()
+
+        # Transform inputs:
+        # ToDO: Do we do this although the inputs are already transformed in the preprocessing decorator?
+        # Todo: Is this better here or in the adaptive_regression function?
+        X_S, scaler = self.transform_x(X=X) #, transform_type=None)
+        self._x_scaler[f'b_{b_i + 1}'] = scaler
+
+        for key, output in items:
+            # Parallel fit regression
+            out = None
+            if parallel:  # and (not self.fast_bootstrap or b_i == 0):
+                out = Parallel(n_jobs=-1, backend='multiprocessing')(
+                    delayed(self.adaptive_regression)(X_S,
+                                                      output[:, idx],
+                                                      idx, self.verbose)
+                    for idx in range(output.shape[1]))
+            elif not parallel:  # and (not self.fast_bootstrap or b_i == 0):
+                results = map(functools.partial(self.adaptive_regression, X_S, verbose=self.verbose),
+                              [output[:, idx] for idx in
+                               range(output.shape[1])],
+                              range(output.shape[1]))
+                out = list(results)
+
+            # Create a dict to pass the variables
+            for i in range(output.shape[1]):
+                self._gp_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['gp']
+                self._bme_score[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['BME']
+                self._kernel_name_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['kernel_name']
+
+    # --------------------------------------------------------------------------------------------------------
+    def adaptive_regression(self, X, y, varIdx, verbose=False):
+        """
+        Adaptively fits the GPE model by comparing different Kernel options
+
+        Parameters
+        ----------
+        X : array of shape (n_samples, ndim)
+            Training set. These samples should be already transformed.
+        y : array of shape (n_samples,)
+            Target values, i.e. simulation results for the Experimental design.
+        varIdx : int
+            Index of the output.
+        verbose : bool, optional
+            Print out summary. The default is False.
+
+        Returns
+        -------
+        returnVars : Dict
+            Fitted estimator, BME score
+
+        """
+
+        # Initialization
+        gp_list = {}
+        BME = []
+
+        # Get kernels:
+        kernel_list, kernel_names = self.build_kernels()
+
+        for i, K in enumerate(kernel_list):
+            if self._kernel_noise:
+                K = K + kernels.WhiteKernel(noise_level=np.std(y)/math.sqrt(2))
+
+            # ToDo: Check if we want to increase number of iterations/tolerance. Set n_restars as variable?
+            # gp_list[i] = MySklGPE(kernel=K,
+            #                       n_restarts_optimizer=20,
+            #                       alpha=self._nugget,
+            #                       normalize_y=True)
+            #
+            gp_list[i] = GaussianProcessRegressor(kernel=K,
+                                                  n_restarts_optimizer=10,
+                                                  alpha=self._nugget,
+                                                  normalize_y=True)
+
+            # Fit to data using Maximum Likelihood Estimation
+            gp_list[i].fit(X, y)
+
+            # Store the MLE as BME score
+            BME.append(gp_list[i].log_marginal_likelihood())
+
+        # Select the GP with the highest BME
+        idx_max = np.argmax(BME)
+        gp = gp_list[idx_max]
+
+        if varIdx is not None and verbose:
+            gp_score = gp.score(X, y)
+            print('=' * 50)
+            print(' ' * 10 + ' Summary of results ')
+            print('=' * 50)
+
+            print(f'Output variable {varIdx}:')
+            print('The estimation of GPE coefficients converged,')
+            print(f'with the R^2 score: {gp_score:.3f} ')
+            print(f'using a {kernel_names[idx_max]} Kernel')
+            print('=' * 50)
+
+        # Create a dict to pass the outputs
+        # ToDo: Save the Kernel hyperparameters?
+        returnVars = {}
+        returnVars['gp'] = gp
+        returnVars['BME'] = np.argmax(BME)
+        returnVars['kernel_name'] = kernel_names[idx_max]
+
+        return returnVars
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def scale_x(X: np.array, transform_obj: object):
+        """
+        Transforms the inputs based on the scaling done during training
+        Parameters
+        ----------
+        X: 2D list or np.array of shape (#samples, #dim)
+            The parameter value combinations to evaluate the model with.
+        transform_obj: Scikit-Learn object
+            Class instance to transform inputs
+
+        Returns
+        -------
+        np.array (#samples, #dim)
+            Transformed input sets
+        """
+        if transform_obj is None:
+            x_t = X
+        else:
+            x_t = transform_obj.transform(X)
+        return x_t
+
+    @_preprocessing_eval
+    @_bootstrap_eval
+    def eval_metamodel(self, samples, b_i=0):
+        """
+        Evaluates GP metamodel at the requested samples.
+
+        Parameters
+        ----------
+        samples : array of shape (n_samples, ndim), optional
+            Samples to evaluate metamodel at. The default is None.
+
+        Returns
+        -------
+        mean_pred : dict
+            Mean of the predictions.
+        std_pred : dict
+            Standard deviation of the predictions.
+        """
+        # Transform the input parameters
+        samples_sc = self.scale_x(X=samples, transform_obj=self._x_scaler[f'b_{b_i + 1}'])
+
+        # Extract model dictionary
+        model_dict = self._gp_poly[f'b_{b_i + 1}']
+
+        # Loop over outputs
+        mean_pred = {}
+        std_pred = {}
+
+        for output, values in model_dict.items():
+            mean = np.empty((len(samples), len(values)))
+            std = np.empty((len(samples), len(values)))
+            idx = 0
+            for in_key, _ in values.items():
+
+                gp = self._gp_poly[f'b_{b_i + 1}'][output][in_key]
+
+                # Prediction
+                y_mean, y_std = gp.predict(samples_sc, return_std=True)
+
+                mean[:, idx] = y_mean
+                std[:, idx] = y_std
+                idx += 1
+
+            mean_pred[output] = mean
+            std_pred[output] = std
+
+        return mean_pred, std_pred
+
-- 
GitLab