From aad38dd3fd6664bcb8a2e1effb658c203fcb8b2b Mon Sep 17 00:00:00 2001
From: farid <farid.mohammadi@iws.uni-stuttgart.de>
Date: Wed, 10 Nov 2021 09:57:41 +0100
Subject: [PATCH] [BayesInference] add verbosity for MCMC and new model error
 options

---
 .../BayesInference/BayesInference.py          | 249 ++++++++++++------
 BayesValidRox/BayesInference/MCMC.py          |  18 +-
 2 files changed, 182 insertions(+), 85 deletions(-)

diff --git a/BayesValidRox/BayesInference/BayesInference.py b/BayesValidRox/BayesInference/BayesInference.py
index c41f1a175..80135bc37 100644
--- a/BayesValidRox/BayesInference/BayesInference.py
+++ b/BayesValidRox/BayesInference/BayesInference.py
@@ -11,6 +11,7 @@ import pandas as pd
 from tqdm import tqdm
 from scipy import stats
 import scipy.linalg as spla
+
 import seaborn as sns
 import emcee
 import corner
@@ -19,6 +20,8 @@ import gc
 from joblib import Parallel,delayed
 from scipy.stats import multivariate_normal
 from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn import preprocessing
 from matplotlib.backends.backend_pdf import PdfPages
 import matplotlib.pylab as plt
 SIZE = 30
@@ -175,30 +178,38 @@ class BayesInference:
             Kernel k(X, X).
     
         """
-        from sklearn.metrics.pairwise import rbf_kernel
+        from sklearn.gaussian_process.kernels import RBF
+        
+        min_max_scaler = preprocessing.MinMaxScaler()
+        X_minmax = min_max_scaler.fit_transform(X)
+
         nparams = len(Sigma2)
         # characteristic length (0,1]
         Lambda =  Sigma2[0]
         # sigma_f controls the marginal variance of b(x)
-        sigma_f = Sigma2[1] 
+        sigma2_f = Sigma2[1] 
         
-        var_cov_matrix = (sigma_f**2) * rbf_kernel(X, gamma = 1/Lambda**2)
+        # cov_matrix = sigma2_f*rbf_kernel(X_minmax, gamma = 1/Lambda**2)
+        
+        rbf = RBF(length_scale=Lambda)
+        cov_matrix = sigma2_f*rbf(X_minmax)
 
         if nparams > 2:
             # (unresolvable error) nugget term that is interpreted as random 
             # error that cannot be attributed to measurement error.
-            sigma_0 = Sigma2[2]
-            for i, j in np.ndindex(var_cov_matrix.shape):
-                var_cov_matrix[i,j] += sigma_0**2 if i==j else 0
+            sigma2_0 = Sigma2[2:]
+            
+            for i, j in np.ndindex(cov_matrix.shape):
+                cov_matrix[i,j] += np.sum(sigma2_0) if i==j else 0
 
-        return var_cov_matrix
+        return cov_matrix
     #--------------------------------------------------------------------------------------------------------
     
     def normpdf(self, Outputs, Data, TotalSigma2s, Sigma2=None, std=None):
         
         Model = self.PCEModel.ModelObj
         logLik = 0.0
-
+        
         # Extract the requested model outputs for likelihood calulation
         if self.ReqOutputType is None: OutputType = Model.Output.Names 
         else: 
@@ -226,29 +237,37 @@ class BayesInference:
                 if hasattr(self, 'BiasInputs'):
                     # TODO: Infer a Bias model usig Gaussian Process Regression
                     # BiasInputs = np.hstack((self.BiasInputs[out],TotalOutputs.T))
-                    # biasSigma2s = self.RBF_kernel(BiasInputs, Sigma2)
-                    biasSigma2s = self.RBF_kernel(self.BiasInputs[out], Sigma2)
+                    EDY = self.PCEModel.ExpDesign.Y[out]
+                    BiasInputs = np.hstack((self.BiasInputs[out],EDY.T))
+                    
+                    biasSigma2s = self.RBF_kernel(BiasInputs, Sigma2)
+                    # biasSigma2s = self.RBF_kernel(self.BiasInputs[out], Sigma2)
                 else:
                     # Infer equal sigma2s
-                    biasSigma2s = np.empty((0))
                     try:
                         sigma2 = Sigma2[idx]
                     except:
                         sigma2 = 0.0
-                    biasSigma2s = np.hstack((biasSigma2s, sigma2 * np.ones(shape=nout)))
-                    # biasSigma2s = np.hstack((biasSigma2s, sigma2 * totalSigma2s))
-    
+
                     # Convert biasSigma2s to a covMatrix
-                    biasSigma2s = np.diag(biasSigma2s)
-    
+                    # biasSigma2s = np.diag( sigma2 * np.ones(shape=nout) )
+                    biasSigma2s = np.diag( sigma2 * totalSigma2s)
+
                 # Add Sigma2 to covMatrix
-                covMatrix += biasSigma2s
+                # covMatrix += biasSigma2s
+                covMatrix = biasSigma2s
+            
+            # TODO: Infer a Bias model usig Gaussian Process Regression
+            if hasattr(self, 'BiasInputs'):
+                TotalOutputs,covMatrix = self.discrepancy_GP.predict(TotalOutputs, out)
 
+            # formatting_function = np.vectorize(lambda f: format(f, '6.2E'))
+            # print(formatting_function(covMatrix))
             # Add the std of the PCE is chosen as emulator.
             if self.emulator:
                 stdPCE = std[out] if std is not None else np.mean(self._stdPCEPriorPred[out], axis=0)
                 # Expected value of variance (Assump: i.i.d stds)
-                covMatrix += np.diag(stdPCE**4)
+                covMatrix += np.diag(stdPCE**3)
 
             # Select the data points to compare
             if self.selectedIndices is not None:
@@ -307,64 +326,78 @@ class BayesInference:
         
     #--------------------------------------------------------------------------------------------------------
     
-    def normpdfSigma2(self, Outputs, Data, Sigma2):
-        
-#        Outputs = Outputs[outputType]
-#        SampleSize = Outputs.shape[0]
-#        NofMeasurements = Outputs.shape[1]
+    def normpdfSigma2(self, Outputs, Data, TotalSigma2s,Sigma2=None, std=None):
         
+        Model = self.PCEModel.ModelObj
+        logLik = 0.0
         
         # Extract the requested model outputs for likelihood calulation
-        Model = self.PCEModel.ModelObj
         if self.ReqOutputType is None: OutputType = Model.Output.Names 
         else: 
             OutputType = list(self.ReqOutputType)
-
-        SampleSize = Outputs[OutputType[0]].shape[0]
         
-        # Flatten the OutputType
-        TotalOutputs = np.empty((SampleSize,0))
-        for outputType in OutputType:
-            TotalOutputs = np.hstack((TotalOutputs, Outputs[outputType]))
-        
-        # Flatten Oberservation datasets
-        AllData = Data.flatten()
-        
-        NofMeasurements = TotalOutputs.shape[1]
-        
-        Likelihoods = np.zeros((SampleSize)) 
-        Deviation = np.zeros((SampleSize,NofMeasurements))
-        covMatrix = np.zeros((NofMeasurements, NofMeasurements), float)
-        
-        for i in range(SampleSize):
-            # Discrepancy model from Discrepancy class
-            var = Sigma2[i]
-            if len(var)==1:
-                np.fill_diagonal(covMatrix, var)
-            else:
-                row,col = np.diag_indices(covMatrix.shape[0])
-                covMatrix[row,col] = np.hstack((np.repeat(var[0], NofMeasurements*0.5),np.repeat(var[1], NofMeasurements*0.5)))
-            
-            # Add the std of the PCE if emulator is chosen.
-            if self.emulator:
-                covMatrix_PCE = np.zeros((NofMeasurements, NofMeasurements), float)
-                stdPCE = np.empty((SampleSize,0))
-                for outputType in OutputType:
-                    stdPCE = np.hstack((stdPCE, self._stdPCEPriorPred[outputType]))
-                
-                # Expected value of variance
-                varPCE = np.mean(stdPCE**2, axis=0)
-                np.fill_diagonal(covMatrix_PCE, varPCE)
-                
-                covMatrix = covMatrix + covMatrix_PCE
+        # Loop over the outputs and calculate logLikelihood
+        for idx, out in enumerate(OutputType):
             
-            denom = (((2*np.pi)**(NofMeasurements/2)) * np.sqrt(np.linalg.det(covMatrix)))
-        
-            Deviation[i] = AllData - TotalOutputs[i]
-            Likelihoods[i] = (np.exp(-0.5 * np.dot(np.dot(Deviation[i], np.linalg.inv(covMatrix)), Deviation[i][:,np.newaxis])))/denom
+            # (Meta)Model Output
+            TotalOutputs = Outputs[out]
+            nout = TotalOutputs.shape[1]
+
+            # Remove NaN
+            try:
+                data = Data[out].to_numpy()[~np.isnan(Data[out])]
+            except:
+                data = Data[out][~np.isnan(Data[out])]
+            totalSigma2s = TotalSigma2s[out][~np.isnan(TotalSigma2s[out])][:nout]
             
-        return np.log(Likelihoods) 
+            # Covariance diagonal entries
+            biasSigma2s = np.zeros((len(Sigma2),nout))
+            # biasSigma2s = np.repeat(totalSigma2s.reshape(1,-1),len(Sigma2),axis=0)
+
+            for sidx,sigma2s in enumerate(Sigma2):
+                # Check the type error term
+                if hasattr(self, 'BiasInputs'):
+                    # TODO: Infer a Bias model usig Gaussian Process Regression
+                    BiasInputs = np.hstack((self.BiasInputs[out],TotalOutputs.T))
+                    biasSigma2s = self.RBF_kernel(BiasInputs, Sigma2)
+                    # biasSigma2s = self.RBF_kernel(self.BiasInputs[out], Sigma2)
+                else:
+                    # Infer equal sigma2s
+                    try:
+                        sigma2 = sigma2s[idx]
+                    except:
+                        sigma2 = 0.0
+                    
+                    # Convert biasSigma2s to a covMatrix
+                    # biasSigma2s[sidx] = sigma2 * np.ones(shape=nout)
+                    biasSigma2s[sidx] = sigma2 * totalSigma2s
+
+                # Add the std of the PCE is chosen as emulator.
+                if self.emulator:
+                    stdPCE = std[out] if std is not None else np.mean(self._stdPCEPriorPred[out], axis=0)
+                    # Expected value of variance (Assump: i.i.d stds)
+                    biasSigma2s[sidx] += stdPCE**3
     
+                # Select the data points to compare
+                if self.selectedIndices is not None:
+                    biasSigma2s[sidx] = biasSigma2s[self.selectedIndices[out]]
+                    
+            # Select the data points to compare
+            if self.selectedIndices is not None:
+                TotalOutputs = TotalOutputs[:,self.selectedIndices[out]]
+                data = data[self.selectedIndices[out]]
+
+            # Compute loglikelihood
+            split_Outputs = np.array_split(TotalOutputs, 8)
+            split_cov = np.array_split(biasSigma2s, 8)
+            outList = Parallel(n_jobs=-1, prefer='threads')(
+                    delayed(stats.multivariate_normal.logpdf)(output, data, np.diag(cov)) 
+                            for output,cov in zip(split_Outputs,split_cov))
+            logLik += np.concatenate(outList).ravel()
+        
+        
+        return logLik
+        
     #------------------------------------------------------------------------------
     def BME_Corr_Weight(self, Data, TotalSigma2s, posterior):
         """
@@ -553,11 +586,12 @@ class BayesInference:
         
         # Take care of the sigma2
         if Sigma2Prior is not None:
-            Posterior_df = Posterior_df.drop(labels=self.Discrepancy.Name, axis=1)
-        else:
-            Posterior_df = self.Posterior_df
-        
-        
+            try:
+                sigma2s = Posterior_df[self.Discrepancy.Name].to_numpy()
+                Posterior_df = Posterior_df.drop(labels=self.Discrepancy.Name, axis=1)
+            except:
+                sigma2s = self.sigma2s
+
         # Posterior predictive
         if self.emulator:
             if self.SamplingMethod == 'rejection':
@@ -568,6 +602,53 @@ class BayesInference:
                 PriorPred = self.__ModelPriorPred
             PosteriorPred = self.eval_Model(Samples=Posterior_df.to_numpy(),key='PostPred')
         
+        # Add discrepancy from likelihood Sample to the current posterior runs
+        TotalSigma2 = self.Discrepancy.TotalSigma2
+        for  varIdx, var in enumerate(Model.Output.Names):
+            for i in range(len(PosteriorPred[var])):
+                pred = PosteriorPred[var][i]
+                
+                # Known sigma2s
+                totalSigma2 = TotalSigma2[var][~np.isnan(TotalSigma2[var])][:len(pred)]
+                cov = np.diag(totalSigma2)
+                
+                # Check the type error term
+                if Sigma2Prior is not None:
+                    # Inferred sigma2s
+                    if hasattr(self, 'BiasInputs'):
+                        # TODO: Infer a Bias model usig Gaussian Process Regression
+                        EDY = self.PCEModel.ExpDesign.Y[var]
+                        BiasInputs = np.hstack((self.BiasInputs[var],EDY.T))
+                        # BiasInputs = np.hstack((self.BiasInputs[var],pred.reshape(-1,1)))
+                        biasSigma2s = self.RBF_kernel(BiasInputs,sigma2s[i])
+                        # biasSigma2s = self.RBF_kernel(self.BiasInputs[var], sigma2s[i])
+                    else:
+                        # Infer equal sigma2s
+                        try:
+                            sigma2 = sigma2s[i,varIdx]
+                        except:
+                            sigma2 = 0.0
+        
+                        # Convert biasSigma2s to a covMatrix
+                        # biasSigma2s = np.diag(sigma2 * np.ones(len(pred)))
+                        biasSigma2s = np.diag( sigma2 * totalSigma2)
+                    
+                    # cov += biasSigma2s
+                    cov = biasSigma2s
+                
+                # TODO: Infer a Bias model usig Gaussian Process Regression
+                if hasattr(self, 'BiasInputs'):
+                    pred,cov = self.discrepancy_GP.predict_SigmaBias(pred, var)
+
+                if self.selectedIndices is not None:
+                    newcov = np.diag(totalSigma2)
+                    newcov[self.selectedIndices[var],self.selectedIndices[var]] = cov[self.selectedIndices[var],self.selectedIndices[var]]
+                else:
+                    newcov = cov
+                
+                # Sample a multi-variate normal distribution with mean of prediction and variance of cov
+                PosteriorPred[var][i] = np.random.multivariate_normal(pred, newcov, 1)  
+                
         # ----- Prior Predictive -----
         if self.SamplingMethod == 'rejection':
             # Create hdf5 metadata
@@ -633,7 +714,6 @@ class BayesInference:
                 
             try:
                 # Take care of an additional Sigma2s
-                NofPa = self.PCEModel.NofPa
                 self.Samples = SamplesAllParameters[:,:NofPa]
             except:
                 self.Samples = SamplesAllParameters
@@ -675,7 +755,8 @@ class BayesInference:
         # ---------- Preparation of variance for covariance matrix ----------
         # TODO: Modification required.
         # Independent and identically distributed
-        TotalSigma2 = dict()#np.empty((0,1))
+        
+        TotalSigma2 = dict()
         optSigmaFlag = isinstance(self.Discrepancy, dict)
         optSigma = None
         for keyIdx, key in enumerate(OutputNames):
@@ -703,8 +784,15 @@ class BayesInference:
                 sigma2 = np.zeros((nMeasurement))
             
             TotalSigma2[key] = sigma2
+            
+            self.Discrepancy.optSigma = optSigma
+            self.Discrepancy.TotalSigma2 = TotalSigma2
         
-        self.Discrepancy.optSigma = optSigma
+        # If inferred sigma2s obtained from e.g. calibration are given
+        try:
+            self.sigma2s = self.Discrepancy.get_Sample(self.NrofSamples)
+        except:
+            pass
 
         # ---------------- Bootstrap & TOM --------------------
         if self.Bootstrap or self.BayesLOOCV:
@@ -762,13 +850,12 @@ class BayesInference:
                 data_dict = {OutputNames[i]:data[j:k] for i,(j,k) in enumerate(indices)}
                 
                 # unknown sigma2
-                if optSigma == 'C':
-                    logLikelihoods[:,itrIdx] = self.normpdfSigma2(modelEvaluations, data_dict, TotalSigma2)
+                if optSigma == 'C' or hasattr(self, 'sigma2s'):
+                    logLikelihoods[:,itrIdx] = self.normpdfSigma2(modelEvaluations, data_dict, TotalSigma2,Sigma2=self.sigma2s, std=surrError)
                 else:
                     # known sigma2
                     logLikelihoods[:,itrIdx] = self.normpdf(modelEvaluations, data_dict, TotalSigma2, std=surrError)
                     
-                    # # TODO: Model error
                     # y,std = PCEModel.eval_errormodel(self.Samples)
                     # logLikError = self.normpdferror(y, std)
                 
@@ -823,6 +910,14 @@ class BayesInference:
         # ---------------- Parameter Bayesian inference ---------------- 
         if self.SamplingMethod.lower() == 'mcmc':
             
+            # TODO: Create a GPR based Bias model
+            if hasattr(self, 'BiasInputs'):
+                from .discrepancy_GP import Bias
+                self.discrepancy_GP = Bias()
+                self.discrepancy_GP.fit_bias(self.BiasInputs, 
+                                             self.PCEModel.ExpDesign.Y,
+                                             self.MeasuredData)
+            
             # Convert the pandas data frame to numpy, if it's applicable.
             if self.MCMCinitSamples is not None and isinstance(self.MCMCinitSamples, pd.DataFrame):
                 self.MCMCinitSamples = self.MCMCinitSamples.to_numpy()
diff --git a/BayesValidRox/BayesInference/MCMC.py b/BayesValidRox/BayesInference/MCMC.py
index 22c9a4c3f..1923d16dd 100755
--- a/BayesValidRox/BayesInference/MCMC.py
+++ b/BayesValidRox/BayesInference/MCMC.py
@@ -26,7 +26,7 @@ class MCMC():
     nsteps = 1000  # number of MCMC steps to take
     ntemps = 20
     """
-    def __init__(self, BayesOpts, initsamples = None, nwalkers = 100, nburn = 100, 
+    def __init__(self, BayesOpts, initsamples = None, nwalkers = 100, nburn = 200, 
                  nsteps = 100000, moves = None, multiprocessing=False, verbose = False):
         
         self.BayesOpts      = BayesOpts
@@ -165,8 +165,9 @@ class MCMC():
                     continue
                 
                 # always print the current mean acceptance fraction
-                print("\nStep: {}".format(sampler.iteration))
-                # print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
+                if self.verbose:
+                    print("\nStep: {}".format(sampler.iteration))
+                    print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
     
                 # compute the autocorrelation time so far
                 # using tol=0 means that we'll always get an estimate even if it isn't trustworthy
@@ -175,8 +176,9 @@ class MCMC():
                 autocorrIdx += 1
     
                 # output current autocorrelation estimate
-                print("Mean autocorr time estimate: {0:.3f}".format(np.nanmean(tau)))
-                print("Gelman-Rubin Test*: ", np.round(self.gelman_rubin(sampler.chain),3))
+                if self.verbose:
+                    print("Mean autocorr time estimate: {0:.3f}".format(np.nanmean(tau)))
+                    print("Gelman-Rubin Test*: ", np.round(self.gelman_rubin(sampler.chain),3))
                 
                 # check convergence
                 converged = np.all(tau * 100 < sampler.iteration)
@@ -223,7 +225,7 @@ class MCMC():
         
         
         # create a PdfPages object
-        if self.verbose:
+        if self.verbose and self.nsteps<10000:
             pdf = PdfPages(OutputDir+'/traceplots.pdf')
             fig = plt.figure()    
             for parIdx in range(ndim):
@@ -316,14 +318,13 @@ class MCMC():
 
                 # Check if bias term needs to be inferred
                 if Discrepancy.optSigma != 'B':
-                    if self.check_ranges(theta[i,:-nSigma2], 
+                    if self.check_ranges(theta[i,-nSigma2:], 
                                          Discrepancy.ExpDesign.BoundTuples):
                         if all('unif' in Discrepancy.ExpDesign.InputObj.Marginals[i].DistType  for i in \
                             range(Discrepancy.ExpDesign.ndim)):
                             logprior[i] = 0.0
                         else:
                             logprior[i] += np.log(Discrepancy.ExpDesign.priorSpace.pdf(theta[i,-nSigma2:]))
-
         if nsamples == 1:
             return logprior[0]
         else:
@@ -386,6 +387,7 @@ class MCMC():
                 log_prior = self.log_prior(theta)
                 # Compute log Likelihood
                 log_likelihood = self.log_likelihood(theta)
+                # print("logPrior={0:.2f} logLik={1:.2f}".format(log_prior,log_likelihood))
                 return log_prior + log_likelihood
         else:
             # Compute log prior
-- 
GitLab