From 580925c0eb63cadff491f30f78e8544580ed2862 Mon Sep 17 00:00:00 2001
From: faridm69 <faridmohammadi69@gmail.com>
Date: Mon, 27 Jul 2020 17:39:50 +0200
Subject: [PATCH] [BayesInference] added trace plot and accelerated the aPCE.

---
 .../BayesInference/BayesInference.py          |  4 +-
 BayesValidRox/BayesInference/MCMC.py          | 47 ++++++++++++++-----
 BayesValidRox/PyLink/FuncForwardModel.py      |  4 +-
 BayesValidRox/PyLink/PyLinkForwardModel.py    |  2 +-
 BayesValidRox/surrogate_models/ExpDesigns.py  |  3 +-
 .../surrogate_models/surrogate_models.py      | 14 +++---
 .../Test_AnalyticalFunction.py                |  8 ++--
 7 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/BayesValidRox/BayesInference/BayesInference.py b/BayesValidRox/BayesInference/BayesInference.py
index ebbdd80f9..1f05f5ed2 100644
--- a/BayesValidRox/BayesInference/BayesInference.py
+++ b/BayesValidRox/BayesInference/BayesInference.py
@@ -602,9 +602,9 @@ class BayesInference:
         if self.SamplingMethod.lower() == 'mcmc':
             # TODO: MCMC
             initsamples = None if self.Samples is None else self.Samples
-            nsteps = 1000 if self.MCMCnSteps is None else self.MCMCnSteps
+            nsteps = 2000 if self.MCMCnSteps is None else self.MCMCnSteps
             multiprocessing = True if self.MultiProcessMCMC is None else self.MultiProcessMCMC
-            MCMC_ = MCMC(self, initsamples=initsamples, nwalkers=2*self.PCEModel.NofPa,
+            MCMC_ = MCMC(self, initsamples=initsamples, nwalkers=3*self.PCEModel.NofPa,
                          nsteps = nsteps, multiprocessing=multiprocessing)
             self.Posterior_df = MCMC_.run_sampler(Data[0], TotalSigma2)
         
diff --git a/BayesValidRox/BayesInference/MCMC.py b/BayesValidRox/BayesInference/MCMC.py
index 31a67fa67..a5d6bae55 100755
--- a/BayesValidRox/BayesInference/MCMC.py
+++ b/BayesValidRox/BayesInference/MCMC.py
@@ -6,9 +6,11 @@ Created on Wed Jun  3 16:42:12 2020
 @author: farid
 """
 
+import os
 import numpy as np
 import emcee
 import pandas as pd
+import matplotlib.pyplot as plt
 from multiprocessing import Pool
 import scipy.stats as st
 from scipy.linalg import cholesky as chol
@@ -71,9 +73,6 @@ class MCMC():
             sampler = emcee.EnsembleSampler(self.nwalkers, ndim, self.log_posterior, args=[Observation, TotalSigma2])
             sampler.run_mcmc(initsamples, self.nsteps, progress=True)
         
-        # sampler.chain is of shape (nwalkers, nsteps, ndim)
-        # we'll throw-out the burn-in points and reshape:
-        emcee_trace = sampler.chain[:, self.nburn:, :].reshape(-1, ndim).T
         
         # Posterior diagnostics
         try:
@@ -82,21 +81,48 @@ class MCMC():
             tau = 2
         burnin = int(2*np.max(tau))
         thin = int(0.5*np.min(tau))
-        samples = sampler.get_chain(discard=burnin, flat=True, thin=thin)
-        log_prob_samples = sampler.get_log_prob(discard=burnin, flat=True, thin=thin)
+        finalsamples = sampler.get_chain(discard=burnin, flat=True)#, thin=thin)
+        log_prob_samples = sampler.get_log_prob(discard=burnin, flat=True)#, thin=thin)
         
         print('\n')
         print('-'*15 + 'Posterior diagnostics' + '-'*15)
         print("mean auto-correlation time: {0:.3f}".format(np.mean(tau)))
         print("thin: {0}".format(thin))
         print("burn-in: {0}".format(burnin))
-        print("flat chain shape: {0}".format(samples.shape))
+        print("flat chain shape: {0}".format(finalsamples.shape))
         print("Mean acceptance fraction*: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
         print("flat log prob shape: {0}".format(log_prob_samples.shape))
         print("\n* This value must lay between 0.2 and 0.5")
         print('-'*50)
         
         print("\n>>>> Bayesian inference with MCMC successfully completed. <<<<<<\n")
+        
+        # Plot traces
+        parNames = [PCEModel.Inputs.Marginals[i].Name for i in range(ndim)]
+        OutputDir = (r'Outputs_Bayes_' + Model.Name + '_' + self.BayesOpts.Name)
+        if not os.path.exists(OutputDir): os.makedirs(OutputDir)
+                
+        for parIdx in range(ndim):
+            # Set up the axes with gridspec
+            fig = plt.figure()
+            grid = plt.GridSpec(4, 4, hspace=0.2, wspace=0.2)
+            main_ax = fig.add_subplot(grid[:-1,:3])
+            y_hist = fig.add_subplot(grid[:-1,-1], xticklabels=[], sharey=main_ax)
+
+            for i in range(self.nwalkers):
+                samples = sampler.chain[i,:,parIdx]
+                main_ax.plot(samples, '-')
+                
+                # histogram on the attached axes
+                y_hist.hist(samples[burnin:], 40, histtype='stepfilled',
+                orientation='horizontal', color='gray')
+                y_hist.invert_xaxis()
+
+            main_ax.set_title('traceplot for ' + parNames[parIdx])
+            main_ax.set_xlabel('step number')
+            
+            fig.savefig(OutputDir+'/traceplot_par_'+str(parIdx+1)+'.svg', bbox_inches='tight')
+        
         # logml_dict = self.Marginal_llk_emcee(sampler, self.nburn, logp=None, maxiter=5000)
         # print('\nThe Bridge Sampling Estimation is %.5f.'%(logml_dict['logml']))
         
@@ -119,7 +145,7 @@ class MCMC():
         
         InputNames = [PCEModel.Inputs.Marginals[i].Name for i in range(len(PCEModel.Inputs.Marginals))]
             
-        Posterior_df = pd.DataFrame(emcee_trace.T, columns=InputNames)
+        Posterior_df = pd.DataFrame(finalsamples, columns=InputNames)
         
         return Posterior_df
     
@@ -135,7 +161,7 @@ class MCMC():
         logprior = -np.inf*np.ones(nsamples)
         
         for i in range(nsamples):
-            if not self.check_ranges(theta[i], params_range):
+            if self.check_ranges(theta[i], params_range):
                 logprior[i] = np.log(priorDist.pdf(theta[i]))
 
         if nsamples == 1:
@@ -150,14 +176,13 @@ class MCMC():
         PCEModel = BayesOpts.PCEModel
         ndimTheta = theta.ndim
         theta = theta if ndimTheta != 1 else theta.reshape((1,-1))
-        
+
         if BayesOpts.emulator:
             # Evaluate the PCEModel
             meanPCEPriorPred, BayesOpts.stdPCEPriorPred = PCEModel.eval_metamodel(samples=theta, name=BayesOpts.Name)
         else:
             # Evaluate the origModel
             meanPCEPriorPred = BayesOpts.eval_Model(theta)
-        
         return BayesOpts.normpdf(meanPCEPriorPred, Observation, TotalSigma2)
         
     def log_posterior(self, theta, Observation, TotalSigma2):
@@ -426,6 +451,6 @@ class MCMC():
         for i, bounds in enumerate(ranges): 
             x = theta[i]
             # condition check 
-            if x< bounds[0] or x> bounds[1]: #or sigma < 0: 
+            if x > bounds[0] or x < bounds[1]: #or sigma < 0: 
                 c = True
         return c
\ No newline at end of file
diff --git a/BayesValidRox/PyLink/FuncForwardModel.py b/BayesValidRox/PyLink/FuncForwardModel.py
index 7adf31ba4..fed1556a6 100644
--- a/BayesValidRox/PyLink/FuncForwardModel.py
+++ b/BayesValidRox/PyLink/FuncForwardModel.py
@@ -48,7 +48,7 @@ class FuncForwardModel:
         # Parallel runs with multiprocessing
         with multiprocessing.Pool() as p:
             group_results = list(tqdm.tqdm(p.imap(self.run_forwardmodel, zip([Function]*P, CollocationPoints)),
-                                           total=P))
+                                           total=P, desc='Running forward model'))
         print("")
         # Save time steps or x-values
         x_values = group_results[0][0]
@@ -90,4 +90,4 @@ class FuncForwardModel:
     
     def read_MCReference(self):
         
-        return pd.DataFrame.from_dict(self.MCReference)
\ No newline at end of file
+        return pd.DataFrame.from_dict(self.MCReference)
diff --git a/BayesValidRox/PyLink/PyLinkForwardModel.py b/BayesValidRox/PyLink/PyLinkForwardModel.py
index e6201eb27..070fea8cd 100644
--- a/BayesValidRox/PyLink/PyLinkForwardModel.py
+++ b/BayesValidRox/PyLink/PyLinkForwardModel.py
@@ -254,7 +254,7 @@ class PyLinkForwardModel:
         # Parallel runs with multiprocessing
         with multiprocessing.Pool(nrCPUs) as p:
             group_results = list(tqdm.tqdm(p.imap(self.run_forwardmodel, zip(CollocationPoints,[prevRun_No+i for i in range(P)], [keyString]*P)),
-                                           total=P))
+                                           total=P, desc='Running forward model'))
         print("\n")
         results = [group_results[NofE][1] for NofE in range(P)]
         # Save time steps or x-values
diff --git a/BayesValidRox/surrogate_models/ExpDesigns.py b/BayesValidRox/surrogate_models/ExpDesigns.py
index 5903dca33..dd200aa6d 100644
--- a/BayesValidRox/surrogate_models/ExpDesigns.py
+++ b/BayesValidRox/surrogate_models/ExpDesigns.py
@@ -36,6 +36,7 @@ class ExpDesigns:
         self.MAP = []
         self.JDist = []
         self.Polytypes= []
+        self.polycoeffs = {}
         self.X = None
         self.Y = None
         
@@ -194,7 +195,7 @@ class ExpDesigns:
             # Create orthogonal polynomial coefficients if necessary
             if MaxPceDegree is not None:
                 for parIdx in tqdm(range(NofPa), ascii=True, desc ="Computing orth. polynomial coeffs"):
-                    aPoly_Construction(self.Input_distributions[parIdx,:], MaxPceDegree, parIdx)
+                    self.polycoeffs['p_'+str(parIdx+1)] = aPoly_Construction(self.Input_distributions[parIdx,:], MaxPceDegree, parIdx)
         else:
             # Generate random samples based on parameter distributions
             self.Input_distributions = chaospy.generate_samples(self.MCSize, domain=self.JDist).T
diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py
index d13ec4b67..97e3b8798 100644
--- a/BayesValidRox/surrogate_models/surrogate_models.py
+++ b/BayesValidRox/surrogate_models/surrogate_models.py
@@ -236,10 +236,11 @@ class Metamodel:
         
         ## Arbitrary polynomials
         if polytype == 'arbitrary':
+            
             parIdx = parIdx_or_parms
             
             # Load PolyCoeffs.npy and return the coeffs for paramter parIdx
-            return np.load('polyCoeffs/par_{}.npy'.format(parIdx+1))
+            return self.ExpDesign.polycoeffs['p_'+str(parIdx+1)]
         
         # for quadrature type computations that the recurrence relations are known,
         # use analytical relations for the recurrence terms:
@@ -391,11 +392,11 @@ class Metamodel:
             # Calculate the univariate polynomials
             for parIdx in range(NofPa):
                 
-                polycoeffs = self.poly_rec_coeffs(n_max, polytypes[parIdx], parIdx)
+                polycoeffs = self.ExpDesign.polycoeffs['p_'+str(parIdx+1)]
                 
                 for deg in range(P):
                     univ_vals[:,parIdx,deg] = univ_kernel(ExpDesignX[:,parIdx], deg)
-
+                
         # ----------------
         else: # Only for Uniform, Normal, Gamma, Beta
             for parIdx in range(NofPa):
@@ -872,7 +873,7 @@ class Metamodel:
         var = np.cumsum(np.round(covar_matrix.explained_variance_ratio_, decimals=5)*100)
         
         try:
-            selected_n_components = np.where(var>=99.999)[0][0] + 1
+            selected_n_components = np.where(var>=99.99)[0][0] + 1
         except:
             selected_n_components = min(n_samples, n_features)
         
@@ -2017,9 +2018,10 @@ class Metamodel:
         else:
             self.Samples = samples
             self.NrofSamples = len(samples)
-            
+        
         if self.metaModel.lower() != 'gpe':
             univ_p_val = self.univ_basis_vals(self.Samples)
+        
         meanPCEOutputs = {}
         stdPCEOutputs = {}
         
@@ -2040,7 +2042,7 @@ class Metamodel:
                 else: # Perdiction with PCE or pcekriging
                     PolynomialDegrees = self.BasisDict[Outkey][Inkey]
                     PSI_Val = self.PCE_create_Psi(PolynomialDegrees, univ_p_val)
-                
+                    
                     # Perdiction 
                     try: # with error bar
                         clf_poly = self.clf_poly[Outkey][Inkey]
diff --git a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
index 1a3a3d400..4e3687688 100755
--- a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
+++ b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
@@ -127,7 +127,7 @@ if __name__ == "__main__":
     
     # One-shot (normal) or Sequential Adaptive (sequential) Design
     MetaModelOpts.ExpDesign.Method = 'normal'
-    NrofInitSamples = 150 #75
+    NrofInitSamples = 500 #75
     MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples
     
     # Sampling methods
@@ -231,7 +231,7 @@ if __name__ == "__main__":
     # Compute and print RMSE error
     PostPCE.relErrorPCEModel(nSamples=3000)
     
-    sys.exit('STOP!!')
+    # sys.exit('STOP!!')
     #=====================================================
     #========  Bayesian inference with Emulator ==========
     #=====================================================
@@ -245,7 +245,7 @@ if __name__ == "__main__":
     
     # Select the inference method
     BayesOpts.SamplingMethod = 'MCMC'
-    BayesOpts.MCMCnSteps = 5000
+    BayesOpts.MCMCnSteps = 3000
     BayesOpts.MultiProcessMCMC = True
     
     
@@ -263,7 +263,7 @@ if __name__ == "__main__":
     
     
     Bayes_PCE = BayesOpts.create_Inference()
-    # sys.exit('STOP!!')
+    sys.exit('STOP!!')
     #=====================================================
     #====== Bayesian inference with Forward Model ========
     #=====================================================
-- 
GitLab