From 2306e76d45a4ea67e696db7a8da11b7726dafdb4 Mon Sep 17 00:00:00 2001
From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com>
Date: Tue, 14 May 2024 14:08:37 +0200
Subject: [PATCH] Decoupled MCMC from BayesInference and improved performance

MCMC does not receive the whole BayesInf object anymore, performance of inference now should be back up to speed with MCMC. In future perhaps rework so that MCMC is a child class of BayesInf?
---
 .../example_analytical_function.py            |   2 +-
 .../bayes_inference/bayes_inference.py        |  27 +-
 src/bayesvalidrox/bayes_inference/mcmc.py     | 333 +++++++++++++++---
 src/bayesvalidrox/surrogate_models/engine.py  |   2 +-
 .../surrogate_models/input_space.py           |   6 +-
 .../surrogate_models/surrogate_models.py      |   5 +
 6 files changed, 312 insertions(+), 63 deletions(-)

diff --git a/examples/analytical-function/example_analytical_function.py b/examples/analytical-function/example_analytical_function.py
index 37900127a..0303411be 100644
--- a/examples/analytical-function/example_analytical_function.py
+++ b/examples/analytical-function/example_analytical_function.py
@@ -25,7 +25,7 @@ matplotlib.use('agg')
 
 # import bayesvalidrox
 # Add BayesValidRox path
-sys.path.append("src/")
+sys.path.append("../../src/")
 
 from bayesvalidrox.pylink.pylink import PyLinkForwardModel
 from bayesvalidrox.surrogate_models.inputs import Input
diff --git a/src/bayesvalidrox/bayes_inference/bayes_inference.py b/src/bayesvalidrox/bayes_inference/bayes_inference.py
index c7cfe78cd..01815f59a 100644
--- a/src/bayesvalidrox/bayes_inference/bayes_inference.py
+++ b/src/bayesvalidrox/bayes_inference/bayes_inference.py
@@ -251,6 +251,7 @@ class BayesInference:
         self.max_a_posteriori = max_a_posteriori
         self.corner_title_fmt = corner_title_fmt
         self.out_dir = out_dir
+        self.bmc = bmc  # Set to True, if you want to cut short to only Model Comparison
 
         # Other properties and parameters (found in code, but never set)
         self.error_model = False  # TODO: no example or use case for this!
@@ -265,7 +266,7 @@ class BayesInference:
         self._mean_pce_prior_pred = None
         self._std_pce_prior_pred = None
         self.__model_prior_pred = None
-        self.bmc = bmc  # Set to True, if you want to cut short to only Model Comparison
+        self.MCMC_Obj = None
 
         # System settings
         if os.name == 'nt':
@@ -290,6 +291,7 @@ class BayesInference:
 
         # If the prior is set by the user, take it, else generate from ExpDes
         if self.prior_samples is None:
+            print('Generating prior samples from the Experimental Design.')
             self.prior_samples = self.engine.ExpDesign.generate_samples(
                 self.n_prior_samples, 'random')
         else:
@@ -308,6 +310,7 @@ class BayesInference:
         # Read observation data 
         # TODO: later use valid #of measurements. but here only get the model observations?
         if self.measured_data is None:
+            print('Reading the observation data.')
             self.measured_data = Model.read_observation(case=self.name)
         # Convert measured_data to a data frame
         if not isinstance(self.measured_data, pd.DataFrame):
@@ -398,6 +401,7 @@ class BayesInference:
                 sigma2 = np.zeros((n_measurement[0]))
             total_sigma2[key] = sigma2
 
+        # TODO: can use this to debug!
         self.Discrepancy.total_sigma2 = total_sigma2
 
         # If inferred sigma2s obtained from e.g. calibration are given
@@ -416,13 +420,21 @@ class BayesInference:
 
         # ---------------- Parameter Bayesian inference ----------------
         # Convert to a dataframe if samples are provided after calibration.
-        MCMC_Obj = None
         if self.name.lower() == 'valid':
             self.posterior_df = pd.DataFrame(self.prior_samples, columns=self.engine.ExpDesign.par_names)
         # Instantiate the MCMC object
         elif self.inference_method.lower() == 'mcmc':
-            MCMC_Obj = MCMC(self)
-            self.posterior_df = MCMC_Obj.run_sampler(
+            #MCMC_Obj = MCMC(self)
+            # Decoupled MCMC from BayesInf
+            self.MCMC_Obj = MCMC(self.engine, self.mcmc_params, 
+                                 self.Discrepancy, 
+                                 # TODO: add this in when the error_MetaModel/error_model works
+                                 # self.BiasInputs, 
+                                 self.bias_inputs, 
+                                 self.error_model, self.req_outputs, 
+                                 self.selected_indices, self.emulator, 
+                                 self.out_dir, self.name)
+            self.posterior_df = self.MCMC_Obj.run_sampler(
                 self.measured_data, total_sigma2
             )
         # Rejection sampling
@@ -440,10 +452,10 @@ class BayesInference:
         print('-' * 50)
 
         # -------- Model Discrepancy -----------
-        if self.error_model and self.name.lower() == 'calib' and MCMC_Obj is not None:  # TODO: where is this used
+        if self.error_model and self.name.lower() == 'calib' and self.MCMC_Obj is not None:  # TODO: where is this used
             # and what does it actually do there?
             self.create_error_model(opt_sigma=opt_sigma,
-                                    type_='posterior', sampler=MCMC_Obj)
+                                    type_='posterior', sampler=self.MCMC_Obj)
 
         # -------- Posterior predictive -----------
         self._posterior_predictive()
@@ -553,6 +565,7 @@ class BayesInference:
 
         # -------- Model Discrepancy -----------
         if self.error_model and self.name.lower() == 'valid':  # TODO: what should be set so that this is tested?
+            print('Creating the error model.')
             self.create_error_model(type_='prior')
         # -----------------------------------------------------
         # ----- Loop over the perturbed observation data ------
@@ -568,6 +581,7 @@ class BayesInference:
         # Compute the prior predictions
         # Evaluate the MetaModel
         if self.emulator:
+            print('Evaluating the metamodel on the prior samples.')
             y_hat, y_std = MetaModel.eval_metamodel(samples=self.prior_samples)
             self.__mean_pce_prior_pred = y_hat
             self._std_pce_prior_pred = y_std
@@ -588,6 +602,7 @@ class BayesInference:
 
         # Evaluate the model
         else:
+            print('Evaluating the model on the prior samples.')
             self.__model_prior_pred = self._eval_model(
                 samples=self.prior_samples, key='PriorPred')
             model_evals = self.__model_prior_pred
diff --git a/src/bayesvalidrox/bayes_inference/mcmc.py b/src/bayesvalidrox/bayes_inference/mcmc.py
index 016eb971b..49748bce7 100755
--- a/src/bayesvalidrox/bayes_inference/mcmc.py
+++ b/src/bayesvalidrox/bayes_inference/mcmc.py
@@ -9,9 +9,10 @@ import matplotlib.pyplot as plt
 from matplotlib.backends.backend_pdf import PdfPages
 import multiprocessing
 import scipy.stats as st
-from scipy.linalg import cholesky as chol
-import warnings
+from scipy import stats
 import shutil
+import scipy.linalg as spla
+from sklearn import preprocessing
 os.environ["OMP_NUM_THREADS"] = "1"
 
 
@@ -94,6 +95,92 @@ def gelman_rubin(chain, return_var=False):
         R_hat = np.sqrt(var_θ / W)
         return R_hat
 
+# -------------------------------------------------------------------------
+def _kernel_rbf(X, hyperparameters):
+    """
+    Isotropic squared exponential kernel.
+
+    Higher l values lead to smoother functions and therefore to coarser
+    approximations of the training data. Lower l values make functions
+    more wiggly with wide uncertainty regions between training data points.
+
+    sigma_f controls the marginal variance of b(x)
+
+    Parameters
+    ----------
+    X : ndarray of shape (n_samples_X, n_features)
+
+    hyperparameters : Dict
+        Lambda characteristic length
+        sigma_f controls the marginal variance of b(x)
+        sigma_0 unresolvable error nugget term, interpreted as random
+                error that cannot be attributed to measurement error.
+    Returns
+    -------
+    var_cov_matrix : ndarray of shape (n_samples_X,n_samples_X)
+        Kernel k(X, X).
+
+    """
+    from sklearn.gaussian_process.kernels import RBF
+    min_max_scaler = preprocessing.MinMaxScaler()
+    X_minmax = min_max_scaler.fit_transform(X)
+
+    nparams = len(hyperparameters)
+    if nparams < 3:
+        raise AttributeError('Provide 3 parameters for the RBF kernel!')
+
+    # characteristic length (0,1]
+    Lambda = hyperparameters[0]
+    # sigma_f controls the marginal variance of b(x)
+    sigma2_f = hyperparameters[1]
+
+    rbf = RBF(length_scale=Lambda)
+    cov_matrix = sigma2_f * rbf(X_minmax)
+
+    # (unresolvable error) nugget term that is interpreted as random
+    # error that cannot be attributed to measurement error.
+    sigma2_0 = hyperparameters[2:]
+    for i, j in np.ndindex(cov_matrix.shape):
+        cov_matrix[i, j] += np.sum(sigma2_0) if i == j else 0
+
+    return cov_matrix
+
+
+# -------------------------------------------------------------------------
+def _logpdf(x, mean, cov):
+    """
+    Computes the likelihood based on a multivariate normal distribution.
+
+    Parameters
+    ----------
+    x : TYPE
+        DESCRIPTION.
+    mean : array_like
+        Observation data.
+    cov : 2d array
+        Covariance matrix of the distribution.
+
+    Returns
+    -------
+    log_lik : float
+        Log likelihood.
+
+    """
+
+    # Tranform into np arrays
+    x = np.array(x)
+    mean = np.array(mean)
+    cov = np.array(cov)
+
+    n = len(mean)
+    L = spla.cholesky(cov, lower=True)
+    beta = np.sum(np.log(np.diag(L)))
+    dev = x - mean
+    alpha = dev.dot(spla.cho_solve((L, True), dev))
+    log_lik = -0.5 * alpha - beta - n / 2. * np.log(2 * np.pi)
+    return log_lik
+
+
 class MCMC:
     """
     A class for bayesian inference via a Markov-Chain Monte-Carlo (MCMC)
@@ -119,29 +206,42 @@ class MCMC:
         Bayes object.
     """
 
-    def __init__(self, BayesOpts):
+    def __init__(self, engine, mcmc_params, Discrepancy, bias_inputs, 
+                 error_model, req_outputs, selected_indices, emulator,
+                 out_dir, name, BiasInputs = None):
+        
+        # TODO: maybe would be worth to make this a child class of BayesInf?
         # Inputs
-        self.BayesOpts = BayesOpts
+        #self.BayesOpts = BayesOpts
+        self.engine = engine
+        self.Discrepancy = Discrepancy
+        
+        # Get the needed properties from the BayesInf class
+        self.bias_inputs = bias_inputs
+        self.error_model = error_model
+        self.BiasInputs = BiasInputs
+        self.selected_indices = selected_indices
+        self.req_outputs = req_outputs
+        self.emulator = emulator
+        self.out_dir = out_dir
+        self.name = name
         
         # Param inits
         self.counter = 0
         self.observation = None
         self.total_sigma2 = None
+        self.error_MetaModel = None
         
-        # Get general params from BayesOpts
-        self.out_dir = self.BayesOpts.out_dir
-        
-        # Get MCMC parameters ftom BayesOpts
-        pars = self.BayesOpts.mcmc_params
-        self.initsamples = pars['init_samples']
+        # Get MCMC parameters from BayesOpts
+        self.initsamples = mcmc_params['init_samples']
         if isinstance(self.initsamples, pd.DataFrame):
             self.initsamples = self.initsamples.values
-        self.nsteps = int(pars['n_steps'])
-        self.nwalkers = int(pars['n_walkers'])
-        self.nburn = pars['n_burn']
-        self.moves = pars['moves']
-        self.mp = pars['multiprocessing']
-        self.verbose = pars['verbose']
+        self.nsteps = int(mcmc_params['n_steps'])
+        self.nwalkers = int(mcmc_params['n_walkers'])
+        self.nburn = mcmc_params['n_burn']
+        self.moves = mcmc_params['moves']
+        self.mp = mcmc_params['multiprocessing']
+        self.verbose = mcmc_params['verbose']
 
     def run_sampler(self, observation, total_sigma2):
         """
@@ -161,10 +261,10 @@ class MCMC:
 
         """
         # Get init values
-        BayesObj = self.BayesOpts
-        Discrepancy = self.BayesOpts.Discrepancy
-        n_cpus = BayesObj.engine.Model.n_cpus
-        ndim = BayesObj.engine.MetaModel.n_params
+        engine = self.engine
+        Discrepancy = self.Discrepancy
+        n_cpus = engine.Model.n_cpus
+        ndim = engine.MetaModel.n_params
         if not os.path.exists(self.out_dir):
             os.makedirs(self.out_dir)
 
@@ -176,11 +276,15 @@ class MCMC:
         np.random.seed(0)
         if self.initsamples is None:
             try:
-                initsamples = BayesObj.engine.ExpDesign.JDist.sample(self.nwalkers).T
-                initsamples = np.swapaxes(np.array([initsamples]),0,1) # TODO: test if this still works with multiple input dists
+                # Version 1 # TODO: recheck this code, it it a mix of old and new
+                priorDist = self.engine.ExpDesign.JDist
+                initsamples = priorDist.sample(self.nwalkers).T
+
+#                initsamples = engine.ExpDesign.JDist.sample(self.nwalkers).T
+#                initsamples = np.swapaxes(np.array([initsamples]),0,1) # TODO: test if this still works with multiple input dists
             except:
                 # when aPCE selected - gaussian kernel distribution
-                inputSamples = BayesObj.engine.ExpDesign.raw_data.T
+                inputSamples = engine.ExpDesign.raw_data.T
                 random_indices = np.random.choice(
                     len(inputSamples), size=self.nwalkers, replace=False
                     )
@@ -206,7 +310,7 @@ class MCMC:
                     initsamples[:, idx_dim] = dist.rvs(size=self.nwalkers)
 
                 # Update lower and upper
-                BayesObj.engine.ExpDesign.bound_tuples = bound_tuples
+                engine.ExpDesign.bound_tuples = bound_tuples
 
         # Check if sigma^2 needs to be inferred
         if Discrepancy.opt_sigma != 'B': # TODO: why !='B'?
@@ -220,10 +324,10 @@ class MCMC:
             disc_bound_tuple = Discrepancy.ExpDesign.bound_tuples
 
             # Update bound_tuples
-            BayesObj.engine.ExpDesign.bound_tuples += disc_bound_tuple
+            engine.ExpDesign.bound_tuples += disc_bound_tuple
 
         print("\n>>>> Bayesian inference with MCMC for "
-              f"{self.BayesOpts.name} started. <<<<<<")
+              f"{self.name} started. <<<<<<")
 
         # Set up the backend and clear it in case the file already exists
         backend = emcee.backends.HDFBackend(f"{self.out_dir}/emcee_sampler.h5")
@@ -270,7 +374,8 @@ class MCMC:
                 self.nwalkers, ndim, self.log_posterior, moves=self.moves,
                 backend=backend, vectorize=True
                 )
-
+            print(f'ndim: {ndim}')
+            print(f'initsamples.shape: {initsamples.shape}')
             # Check if a burn-in phase is needed!
             if self.initsamples is None:
                 # Burn-in
@@ -305,8 +410,9 @@ class MCMC:
                     continue
 
                 # Train model discrepancy/error
-                if hasattr(BayesObj, 'errorModel') and BayesObj.errorModel \
-                   and not sampler.iteration % 3 * autocorreverynsteps:
+                # TODO: add this back in when the error model is actually working
+                #       and this is not dependent on BayesObj
+                if self.error_model and not sampler.iteration % 3 * autocorreverynsteps:
                     try:
                         self.error_MetaModel = self.train_error_model(sampler)
                     except:
@@ -370,17 +476,17 @@ class MCMC:
         print("** These values must be smaller than 1.1.")
         print('-'*50)
 
-        print(f"\n>>>> Bayesian inference with MCMC for {self.BayesOpts.name} "
+        print(f"\n>>>> Bayesian inference with MCMC for {self.name} "
               "successfully completed. <<<<<<\n")
 
         # Extract parameter names and their prior ranges
-        par_names = self.BayesOpts.engine.ExpDesign.par_names
+        par_names = engine.ExpDesign.par_names
 
         if Discrepancy.opt_sigma != 'B':
             for i in range(len(Discrepancy.InputDisc.Marginals)):
                 par_names.append(Discrepancy.InputDisc.Marginals[i].name)
 
-        params_range = self.BayesOpts.engine.ExpDesign.bound_tuples
+        params_range = engine.ExpDesign.bound_tuples
 
         # Plot traces
         if self.verbose and self.nsteps < 10000:
@@ -449,8 +555,9 @@ class MCMC:
             returned otherwise an array.
 
         """
-        MetaModel = self.BayesOpts.engine.MetaModel
-        Discrepancy = self.BayesOpts.Discrepancy
+        engine = self.engine
+        MetaModel = engine.MetaModel
+        Discrepancy = self.Discrepancy
 
         # Find the number of sigma2 parameters
         if Discrepancy.opt_sigma != 'B':
@@ -460,8 +567,8 @@ class MCMC:
             n_sigma2 = len(disc_bound_tuples)
         else:
             n_sigma2 = -len(theta)
-        prior_dist = self.BayesOpts.engine.ExpDesign.prior_space
-        params_range = self.BayesOpts.engine.ExpDesign.bound_tuples
+        prior_dist = engine.ExpDesign.prior_space
+        params_range = engine.ExpDesign.bound_tuples
         theta = theta if theta.ndim != 1 else theta.reshape((1, -1))
         nsamples = theta.shape[0]
         logprior = -np.inf*np.ones(nsamples)
@@ -512,10 +619,8 @@ class MCMC:
             Log likelihood.
 
         """
-
-        BayesOpts = self.BayesOpts
-        MetaModel = BayesOpts.engine.MetaModel
-        Discrepancy = self.BayesOpts.Discrepancy
+        MetaModel = self.engine.MetaModel
+        Discrepancy = self.Discrepancy
 
         # Find the number of sigma2 parameters
         if Discrepancy.opt_sigma != 'B':
@@ -531,17 +636,18 @@ class MCMC:
         theta = theta if theta.ndim != 1 else theta.reshape((1, -1))
 
         # Evaluate Model/MetaModel at theta
-        mean_pred, BayesOpts._std_pce_prior_pred = self.eval_model(theta)
+        mean_pred, _std_pce_prior_pred = self.eval_model(theta)
 
         # Surrogate model's error using RMSE of test data
         surrError = MetaModel.rmse if hasattr(MetaModel, 'rmse') else None
 
         # Likelihood
-        log_like = BayesOpts.normpdf(
+        log_like = self.normpdf(
             mean_pred, self.observation, self.total_sigma2, sigma2,
             std=surrError
             )
-        return log_like
+        # TODO: give the second return argument back to BayesInf (parameter of the same name)
+        return log_like, _std_pce_prior_pred 
 
     # -------------------------------------------------------------------------
     def log_posterior(self, theta):
@@ -570,7 +676,7 @@ class MCMC:
                 # Compute log prior
                 log_prior = self.log_prior(theta)
                 # Compute log Likelihood
-                log_likelihood = self.log_likelihood(theta)
+                log_likelihood, _std_pce_prior_pred = self.log_likelihood(theta)
 
                 return log_prior + log_likelihood
         else:
@@ -585,7 +691,7 @@ class MCMC:
 
             # Compute loLikelihoods
             if non_inf_idx.size != 0:
-                log_likelihood[non_inf_idx] = self.log_likelihood(
+                log_likelihood[non_inf_idx], _std_pce_prior_pred = self.log_likelihood(
                     theta[non_inf_idx]
                     )
 
@@ -609,13 +715,12 @@ class MCMC:
             Std of model prediction.
 
         """
+        engine = self.engine
+        Model = engine.Model
 
-        BayesObj = self.BayesOpts
-        Model = BayesObj.engine.Model
-
-        if BayesObj.emulator:
+        if self.emulator:
             # Evaluate the MetaModel
-            mean_pred, std_pred = BayesObj.engine.MetaModel.eval_metamodel(samples=theta)
+            mean_pred, std_pred = engine.MetaModel.eval_metamodel(samples=theta)
         else:
             # Evaluate the origModel
             mean_pred, std_pred = dict(), dict()
@@ -636,9 +741,9 @@ class MCMC:
             # Add one to the counter
             self.counter += 1
 
-        if hasattr(self, 'error_MetaModel') and BayesObj.error_model:
+        if self.error_MetaModel is not None and self.error_model:
             meanPred, stdPred = self.error_MetaModel.eval_model_error(
-                BayesObj.BiasInputs, mean_pred
+                self.BiasInputs, mean_pred
                 )
 
         return mean_pred, std_pred
@@ -659,7 +764,7 @@ class MCMC:
             A error model.
 
         """
-        MetaModel = self.BayesOpts.engine.MetaModel
+        MetaModel = self.engine.MetaModel
 
         # Prepare the poster samples
         try:
@@ -684,6 +789,126 @@ class MCMC:
 
         # Train a GPR meta-model using MAP
         error_MetaModel = MetaModel.create_model_error(
-            self.BayesOpts.BiasInputs, y_map, name='Calib')
+            self.BiasInputs, y_map, name='Calib')
 
         return error_MetaModel
+    
+    # -------------------------------------------------------------------------
+    def normpdf(self, outputs, obs_data, total_sigma2s, sigma2=None, std=None):
+        """
+        Calculates the likelihood of simulation outputs compared with
+        observation data.
+
+        Parameters
+        ----------
+        outputs : dict
+            A dictionary containing the simulation outputs as array of shape
+            (n_samples, n_measurement) for each model output.
+        obs_data : dict
+            A dictionary/dataframe containing the observation data.
+        total_sigma2s : dict
+            A dictionary with known values of the covariance diagonal entries,
+            a.k.a. sigma^2.
+        sigma2 : array, optional
+            An array of the sigma^2 samples, when the covariance diagonal
+            entries are unknown and are being jointly inferred. The default is
+            None.
+        std : dict, optional
+            A dictionary containing the root mean squared error as array of
+            shape (n_samples, n_measurement) for each model output. The default
+            is None.
+
+        Returns
+        -------
+        logLik : array of shape (n_samples)
+            Likelihoods.
+
+        """
+        Model = self.engine.Model
+        logLik = 0.0
+
+        # Extract the requested model outputs for likelihood calulation
+        if self.req_outputs is None:
+            req_outputs = Model.Output.names  # TODO: should this then be saved as self.req_outputs?
+        else:
+            req_outputs = list(self.req_outputs)
+
+        # Loop over the output keys
+        for idx, out in enumerate(req_outputs):
+
+            # (Meta)Model Output
+            nsamples, nout = outputs[out].shape
+
+            # Prepare data and remove NaN
+            try:
+                data = obs_data[out].values[~np.isnan(obs_data[out])]
+            except AttributeError:
+                data = obs_data[out][~np.isnan(obs_data[out])]
+
+            # Prepare data uncertainty / error estimation (sigma2s)
+            non_nan_indices = ~np.isnan(total_sigma2s[out])
+            tot_sigma2s = total_sigma2s[out][non_nan_indices][:nout]
+
+            # Add the std of the PCE if an emulator is used
+            if self.emulator:
+                if std is not None:
+                    tot_sigma2s += std[out] ** 2
+
+            # Select the data points to compare
+            try:
+                indices = self.selected_indices[out]
+            except:
+                indices = list(range(nout))
+
+            # Set up Covariance Matrix
+            covMatrix = np.diag(np.diag(tot_sigma2s)[indices, indices])
+
+            # If sigma2 is not given, use given total_sigma2s and move to next itr
+            if sigma2 is None:
+                logLik += stats.multivariate_normal.logpdf(
+                    outputs[out][:, indices], data[indices], covMatrix)
+                continue
+
+            # Loop over each run/sample and calculate logLikelihood
+            logliks = np.zeros(nsamples)
+            for s_idx in range(nsamples):
+
+                # Simulation run
+                tot_outputs = outputs[out]
+
+                # Covariance Matrix
+                covMatrix = np.diag(tot_sigma2s)
+
+                # Check the type error term
+                if self.bias_inputs is not None and self.error_model is None:
+                    # Infer a Bias model usig Gaussian Process Regression
+                    bias_inputs = np.hstack(
+                        (self.bias_inputs[out],
+                         tot_outputs[s_idx].reshape(-1, 1)))
+
+                    params = sigma2[s_idx, idx * 3:(idx + 1) * 3]
+                    covMatrix = _kernel_rbf(bias_inputs, params)
+                else:
+                    # Infer equal sigma2s
+                    try:
+                        sigma_2 = sigma2[s_idx, idx]
+                    except TypeError:
+                        sigma_2 = 0.0
+
+                    covMatrix += sigma_2 * np.eye(nout)
+                    # covMatrix = np.diag(sigma2 * total_sigma2s)
+
+                # Select the data points to compare
+                try:
+                    indices = self.selected_indices[out]
+                except:
+                    indices = list(range(nout))
+                covMatrix = np.diag(covMatrix[indices, indices])
+
+                # Compute loglikelihood
+                logliks[s_idx] = _logpdf(
+                    tot_outputs[s_idx, indices], data[indices], covMatrix
+                )
+            logLik += logliks
+        return logLik
+
diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py
index 1c8fa56e6..27c117a79 100644
--- a/src/bayesvalidrox/surrogate_models/engine.py
+++ b/src/bayesvalidrox/surrogate_models/engine.py
@@ -199,7 +199,7 @@ class Engine:
         # Prepare X samples 
         # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on 
         ExpDesign.generate_ED(ExpDesign.n_init_samples,
-                              transform=True,
+                              #transform=True,
                               max_pce_deg=np.max(MetaModel.pce_deg))
 
         # Run simulations at X 
diff --git a/src/bayesvalidrox/surrogate_models/input_space.py b/src/bayesvalidrox/surrogate_models/input_space.py
index c534d34b2..9576462d8 100644
--- a/src/bayesvalidrox/surrogate_models/input_space.py
+++ b/src/bayesvalidrox/surrogate_models/input_space.py
@@ -7,6 +7,7 @@ Input space built from set prior distributions
 import numpy as np
 import chaospy
 import scipy.stats as st
+from tqdm import tqdm
 
 
 # noinspection SpellCheckingInspection
@@ -363,7 +364,10 @@ class InputSpace:
             cdfx = np.zeros(X.shape)
             tr_X = np.zeros(X.shape)
 
-            for par_i in range(n_params):
+            # TODO: this transformation takes quite a while,
+            #       especially for many samples to transform, can it be improved?
+            for par_i in range(n_params):#tqdm(range(n_params),
+                         #     desc='Transforming the input samples'):
 
                 # Extract the parameters of the original space
                 disttype = disttypes[par_i]
diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py
index d8a589dde..eb22b9085 100644
--- a/src/bayesvalidrox/surrogate_models/surrogate_models.py
+++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py
@@ -1176,6 +1176,7 @@ class MetaModel:
             method='user'
         )
         # Compute univariate bases for the given samples
+        #print('Creating the univariate basis.')
         univ_p_val = None
         if self.meta_model_type.lower() != 'gpe':
             univ_p_val = self.univ_basis_vals(
@@ -1189,6 +1190,7 @@ class MetaModel:
         std_pred_b = {}
         b_i = 0
         # Loop over bootstrap iterations
+        #print('Looping over bootstrap iterations.')
         for b_i in range(self.n_bootstrap_itrs):
 
             # Extract model dictionary
@@ -1200,11 +1202,14 @@ class MetaModel:
             # Loop over outputs
             mean_pred = {}
             std_pred = {}
+            #print(model_dict.items())
+            #print('Looping over the output timesteps')
             for output, values in model_dict.items():
 
                 mean = np.empty((len(samples), len(values)))
                 std = np.empty((len(samples), len(values)))
                 idx = 0
+                #print('Looping over ??')
                 for in_key, InIdxValues in values.items():
 
                     # Prediction with GPE
-- 
GitLab