diff --git a/BayesValidRox/tests/PollutionTest/MeasuredData.npy b/BayesValidRox/tests/PollutionTest/MeasuredData.npy deleted file mode 100644 index 18e951f32ca487cc980c7e2b2927c77c869947a6..0000000000000000000000000000000000000000 Binary files a/BayesValidRox/tests/PollutionTest/MeasuredData.npy and /dev/null differ diff --git a/BayesValidRox/tests/PollutionTest/Pollution.py b/BayesValidRox/tests/PollutionTest/Pollution.py index 0248f3c8eb58dbd394bb104b570fff42bbcb7a30..46d814bb6a4f87f1164fce771fadc20b1a076ea3 100644 --- a/BayesValidRox/tests/PollutionTest/Pollution.py +++ b/BayesValidRox/tests/PollutionTest/Pollution.py @@ -6,6 +6,8 @@ Created on Wed Nov 20 14:48:43 2019 @author: farid """ import numpy as np +import scipy.stats as stats +import seaborn as sns def Pollution(xx, s=None, t=None): """ @@ -56,7 +58,7 @@ def Pollution(xx, s=None, t=None): if t is None: t = np.arange(0.3, 60, 1) # 0.3 - if s is None: s = np.array([0.5, 1, 1.5, 2, 2.5]) + if s is None: s = np.array([2.5])#np.array([0.5, 1, 1.5, 2, 2.5]) ds = len(s); dt = len(t); @@ -90,9 +92,78 @@ def Pollution(xx, s=None, t=None): Yrow = Y.T Output[idx] = Yrow.flatten('F') - return Output + return np.vstack((t, Output)) + +if __name__ == "__main__": + + MCSize = 750000 #1000000 + ndim = 4 + sigma = 0.5 + + # ------------------------------------------------------------------------- + # ----------------------- Synthetic data generation ----------------------- + # ------------------------------------------------------------------------- + MAP = np.array([[10, 0.07, 1.505, 30.1525]]) + outputMAP = Pollution(MAP) + + synthethicData = np.random.normal(0,sigma,outputMAP[1].shape) + + # import matplotlib.pyplot as plt + # plt.scatter(outputMAP[0], synthethicData, ls='-', marker='*') + # plt.legend(loc='best') + # plt.show() + np.save("data/MeasuredData.npy", synthethicData) + # ------------------------------------------------------------------------- + # ---------------------- Generate Prior distribution ---------------------- + # ------------------------------------------------------------------------- + + xx = np.zeros((MCSize, ndim)) + + params = [(7, 13), (0.02, 0.12), (0.01, 3), (30.01, 30.295)] + + for idxDim in range(ndim): + lower, upper = params[idxDim] + xx[:,idxDim] = stats.uniform(loc=lower, scale=upper-lower).rvs(size=MCSize) + + # ------------------------------------------------------------------------- + # ------------- BME and Kullback-Leibler Divergence ----------------------- + # ------------------------------------------------------------------------- + Outputs = Pollution(xx) + + cov_matrix = np.diag(np.repeat(sigma**2, synthethicData.shape)) + + Likelihoods = stats.multivariate_normal.pdf(Outputs[1:], mean=synthethicData, cov=cov_matrix) + + sns.kdeplot(np.log(Likelihoods[Likelihoods>0]), shade=True, color="g", label='Ref. Likelihood') + + normLikelihood = Likelihoods / np.nanmax(Likelihoods) + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCSize)[0] + + # Reject the poorly performed prior + accepted = normLikelihood >= unif + + # Prior-based estimation of BME + logBME = np.log(np.nanmean(Likelihoods)) + print('\nThe Naive MC-Estimation of BME is %.5f.'%(logBME)) + + # Posterior-based expectation of likelihoods + postExpLikelihoods = np.mean(np.log(Likelihoods[accepted])) + + # Calculate Kullback-Leibler Divergence + KLD = postExpLikelihoods - logBME + print("The Kullback-Leibler divergence estimation is %.5f."%KLD) + + # ------------------------------------------------------------------------- + # ----------------- Save the arrays as .npy files ------------------------- + # ------------------------------------------------------------------------- + if MCSize > 500000: + np.save("data/ParamSets.npy", xx) + np.save("data/refBME_KLD.npy", (logBME, KLD)) + np.save("data/mean.npy", np.mean(Outputs[1:],axis=0)) + np.save("data/std.npy", np.std(Outputs[1:],axis=0)) + else: + np.save("data/validSet.npy", xx) + np.save("data/validSetOutput.npy", Outputs[1:]) + np.save("data/validLikelihoods.npy", Likelihoods) -## Synthetic data generation -#MAP = np.array([[8, 0.1, 1, 30.2]]) -#outputMAP = Pollution(MAP) -#np.save("MeasuredData.npy", outputMAP) \ No newline at end of file diff --git a/BayesValidRox/tests/PollutionTest/Pollution_Test.py b/BayesValidRox/tests/PollutionTest/Pollution_Test.py index 568885209218e4be97cf2f2a713390f05e67310a..ce3cdb96cce242762d76d11a7a99d884afc77ffd 100644 --- a/BayesValidRox/tests/PollutionTest/Pollution_Test.py +++ b/BayesValidRox/tests/PollutionTest/Pollution_Test.py @@ -7,6 +7,7 @@ Created on Fri Aug 9 16:25:43 2019 """ import numpy as np +import pandas as pd import matplotlib.pyplot as plt plt.style.use('seaborn-white') plt.rcParams.update({'font.size': 16}) @@ -44,8 +45,15 @@ if __name__ == "__main__": Model.Output.Names = ['Z'] - # MAP = (8, 0.1, 1, 30.2) - Model.Observations['Z'] = np.load("MeasuredData.npy").T + # Synthetic data for Bayesian inversion. + # MAP = (10, 0.07, 1.505, 30.1525) + Model.Observations['Time [s]'] = np.arange(0.3, 60, 1) + Model.Observations['Z'] = np.load("data/MeasuredData.npy") + + # For Checking with the MonteCarlo refrence + Model.MCReference['Time [s]'] = np.arange(0.3, 60, 1) + Model.MCReference['mean'] = np.load("data/mean.npy") + Model.MCReference['std'] = np.load("data/std.npy") #===================================================== #========= PROBABILISTIC INPUT MODEL ============== @@ -54,48 +62,77 @@ if __name__ == "__main__": # standard deviation Inputs = Input() + aPCE = True + ParamSets = np.load('data/ParamSets.npy')[:50000] # mass of pollutant spilled at each location Inputs.addMarginals() Inputs.Marginals[0].Name = 'M' - Inputs.Marginals[0].DistType = 'unif' - Inputs.Marginals[0].Parameters = [7, 13] + if aPCE: + Inputs.Marginals[0].InputValues = ParamSets[:,0] + else: + Inputs.Marginals[0].DistType = 'unif' + Inputs.Marginals[0].Parameters = [7, 13] # diffusion rate in the channel Inputs.addMarginals() Inputs.Marginals[1].Name = 'D' - Inputs.Marginals[1].DistType = 'unif' - Inputs.Marginals[1].Parameters = [0.02, 0.12] + if aPCE: + Inputs.Marginals[1].InputValues = ParamSets[:,1] + else: + Inputs.Marginals[1].DistType = 'unif' + Inputs.Marginals[1].Parameters = [0.02, 0.12] # location of the second spill Inputs.addMarginals() Inputs.Marginals[2].Name = 'L' - Inputs.Marginals[2].DistType = 'unif' - Inputs.Marginals[2].Parameters = [0.01, 3] - + if aPCE: + Inputs.Marginals[2].InputValues = ParamSets[:,2] + else: + Inputs.Marginals[2].DistType = 'unif' + Inputs.Marginals[2].Parameters = [0.01, 3] + # time of the second spill Inputs.addMarginals() Inputs.Marginals[3].Name = 'tau' - Inputs.Marginals[3].DistType = 'unif' - Inputs.Marginals[3].Parameters = [30.01, 30.295] + if aPCE: + Inputs.Marginals[3].InputValues = ParamSets[:,3] + else: + Inputs.Marginals[3].DistType = 'unif' + Inputs.Marginals[3].Parameters = [30.01, 30.295] #===================================================== #====== POLYNOMIAL CHAOS EXPANSION METAMODELS ====== #===================================================== MetaModelOpts = Metamodel(Inputs) - + + # Select if you want to preserve the spatial/temporal depencencies + # MetaModelOpts.DimRedMethod = 'PCA' + # MetaModelOpts.varPCAThreshold = 100 #99.999 + + # Select your metamodel method + # 1) PCE (Polynomial Chaos Expansion) 2) GPE (Gaussian Process Emulator) + # 3) PCEKriging (PCE + GPE) + MetaModelOpts.metaModel = 'PCE' + + # ------------------------------------------------ + # ------------- PCE Specification ---------------- + # ------------------------------------------------ + # Select the sparse least-square minimization method for + # the PCE coefficients calculation: + # 1)OLS: Ordinary Least Square 2)BRR: Bayesian Ridge Regression + # 3)LARS: Least angle regression 4)ARD: Bayesian ARD Regression + # 5)FastARD: Fast Bayesian ARD Regression + MetaModelOpts.RegMethod = 'FastARD' + # Specify the max degree to be compared by the adaptive algorithm: # The degree with the lowest Leave-One-Out cross-validation (LOO) # error (or the highest score=1-LOO)estimator is chosen as the final # metamodel. MetaModelOpts.MinPceDegree = 1 - MetaModelOpts.MaxPceDegree = 5 + MetaModelOpts.MaxPceDegree = 10 - # Select the sparse least-square minimization method for - # the PCE coefficients calculation: - # 1)AaPCE: Adaptive aPCE 2)BRR: Bayesian Ridge Regression - # 3)LARS: Least angle regression 4)ARD: Bayesian ARD Regression (Not Working) - - MetaModelOpts.RegMethod = 'BRR' + # q-quasi-norm 0<q<1 (default=1) + MetaModelOpts.q = 0.85 #np.linspace(0.3,0.8, 3) # Print summary of the regression results #MetaModelOpts.DisplayFlag = True @@ -106,8 +143,8 @@ if __name__ == "__main__": MetaModelOpts.addExpDesign() # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.Method = 'normal' - NrofInitSamples = 100 #75 + MetaModelOpts.ExpDesign.Method = 'sequential' + NrofInitSamples = 10 #200 MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) chebyshev(FT) @@ -121,7 +158,7 @@ if __name__ == "__main__": # Plot the posterior snapshots for SeqDesign MetaModelOpts.ExpDesign.PostSnapshot = True MetaModelOpts.ExpDesign.stepSnapshot = 1 - MetaModelOpts.ExpDesign.MAP = (8, 0.1, 1, 30.2) + MetaModelOpts.ExpDesign.MAP = (10, 0.07, 1.505, 30.1525) MetaModelOpts.ExpDesign.parNames = ['M', 'D', 'L', 'tau'] # -------- Optimality criteria: Optimization ------ @@ -148,6 +185,11 @@ if __name__ == "__main__": # MetaModelOpts.ExpDesign.UtilityFunction = 'D-Opt'#['D-Opt', 'A-Opt', 'K-Opt'] # + # For colculation of validation error for SeqDesign + MetaModelOpts.validSamples = np.load("data/validSet.npy") + MetaModelOpts.validModelRuns = np.load("data/validSetOutput.npy") + MetaModelOpts.validLikelihoods = np.load("data/validLikelihoods.npy") + # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Adaptive sparse arbitrary polynomial chaos expansion PCEModel = MetaModelOpts.create_metamodel(Model) @@ -168,6 +210,10 @@ if __name__ == "__main__": if MetaModelOpts.ExpDesign.Method == 'sequential': PostPCE.seqDesignDiagnosticPlots() + # Plot the sobol indices + if MetaModelOpts.metaModel != 'GPE': + sobol_cell, total_sobol = PostPCE.sobolIndicesPCE() + import sys sys.exit("STOP") #===================================================== @@ -177,71 +223,76 @@ if __name__ == "__main__": BayesOpts.emulator = True # Evaluation of surrogate model predictions - BayesOpts.NrofSamples = 100000 + # BayesOpts.NrofSamples = 100000 - BayesOpts.PlotPostDist = True - BayesOpts.PlotPostPred = True - # ----- Define the discrepancy model ------- - # (Option B) -# DiscrepancyOpts = Discrepancy('') -# DiscrepancyOpts.Type = 'Gaussian' -# DiscrepancyOpts.Parameters = 5e-1 + # BME Bootstrap + # BayesOpts.Bootstrap = True + # BayesOpts.BootstrapItrNr = 10 + # BayesOpts.BootstrapNoise = 0.05 - # (Option C) - DiscInputOpts = Input() + # Select the inference method + BayesOpts.SamplingMethod = 'MCMC' + BayesOpts.MCMCnwalkers = 100 + # BayesOpts.MCMCnSteps = 500 - DiscInputOpts.addMarginals() - DiscInputOpts.Marginals[0].Name = 'Sigma2' - DiscInputOpts.Marginals[0].DistType = 'unif' - DiscInputOpts.Marginals[0].Parameters = [0, 2] - DiscrepancyOpts = Discrepancy(DiscInputOpts) + BayesOpts.PlotPostDist = True + BayesOpts.PlotPostPred = True + # ----- Define the discrepancy model ------- + obsData = pd.DataFrame(0.5*np.ones(60), columns=Model.Output.Names) + BayesOpts.MeasurementError = obsData + # (Option B) + DiscrepancyOpts = Discrepancy('') + DiscrepancyOpts.Type = 'Gaussian' + DiscrepancyOpts.Parameters = obsData**2 BayesOpts.Discrepancy = DiscrepancyOpts - Bayes_PCE = BayesOpts.create_Inference() + Bayes_PCE = BayesOpts.create_Inference() + import sys + sys.exit("STOP") # Make directory for saving the plots/outputs - newpath = (r'Outputs_'+MetaModelOpts.ExpDesign.Method+'_'+MetaModelOpts.ExpDesign.SeqOptimMethod) - if not os.path.exists(newpath): os.makedirs(newpath) + # newpath = (r'Outputs_'+MetaModelOpts.ExpDesign.Method+'_'+MetaModelOpts.ExpDesign.SeqOptimMethod) + # if not os.path.exists(newpath): os.makedirs(newpath) - ## Plot the posterior (Model) - with sns.axes_style("white"): - g = sns.jointplot(x=Bayes_PCE.Posterior_df['x1'], y=Bayes_PCE.Posterior_df['x2'], kind="kde", color="k"); - g.ax_joint.collections[0].set_alpha(0) - g.set_axis_labels("$X$", "$Y$") + # ## Plot the posterior (Model) + # with sns.axes_style("white"): + # g = sns.jointplot(x=Bayes_PCE.Posterior_df['x1'], y=Bayes_PCE.Posterior_df['x2'], kind="kde", color="k"); + # g.ax_joint.collections[0].set_alpha(0) + # g.set_axis_labels("$X$", "$Y$") - g.savefig('./'+newpath+'/Posterior_PCEModel_'+MetaModelOpts.ExpDesign.UtilityFunction[0]+'.svg') - plt.close() + # g.savefig('./'+newpath+'/Posterior_PCEModel_'+MetaModelOpts.ExpDesign.UtilityFunction[0]+'.svg') + # plt.close() - # Plot the evolution of the LOOCV of the Sequential Experimental Design. - if MetaModelOpts.ExpDesign.Method == 'sequential': - SeqLOODict = PCEModel.SeqModifiedLOO - nameUtil = list(SeqLOODict.keys()) + # # Plot the evolution of the LOOCV of the Sequential Experimental Design. + # if MetaModelOpts.ExpDesign.Method == 'sequential': + # SeqLOODict = PCEModel.SeqModifiedLOO + # nameUtil = list(SeqLOODict.keys()) - markers = ['x', 'o', 'd'] - colors = ['k', 'r', 'b'] + # markers = ['x', 'o', 'd'] + # colors = ['k', 'r', 'b'] - plt.figure(figsize=(24, 16)) - for idx, name in enumerate(nameUtil): - SeqLOO = SeqLOODict[name] - x_idx = np.arange(NrofInitSamples, SeqLOO.shape[0]+NrofInitSamples) - marker = markers[idx] - color = colors[idx] + # plt.figure(figsize=(24, 16)) + # for idx, name in enumerate(nameUtil): + # SeqLOO = SeqLOODict[name] + # x_idx = np.arange(NrofInitSamples, SeqLOO.shape[0]+NrofInitSamples) + # marker = markers[idx] + # color = colors[idx] - plt.semilogy(x_idx, SeqLOO, marker=marker, color=color, label=name) + # plt.semilogy(x_idx, SeqLOO, marker=marker, color=color, label=name) - plt.xlabel('Number of runs') - plt.ylabel('Modified LOO error') - plt.legend() + # plt.xlabel('Number of runs') + # plt.ylabel('Modified LOO error') + # plt.legend() - plt.savefig('./'+newpath+'/ModLOOError.svg', bbox_inches='tight') - np.save('./'+newpath+'/SeqLoo.npy', SeqLOO) - plt.show() - plt.close() + # plt.savefig('./'+newpath+'/ModLOOError.svg', bbox_inches='tight') + # np.save('./'+newpath+'/SeqLoo.npy', SeqLOO) + # plt.show() + # plt.close() #===================================================== #====== Bayesian inference with Forward Model ======== diff --git a/BayesValidRox/tests/PollutionTest/data/MeasuredData.npy b/BayesValidRox/tests/PollutionTest/data/MeasuredData.npy new file mode 100644 index 0000000000000000000000000000000000000000..f05ce103231930d48404e37bcf121624b56524b9 Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/MeasuredData.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/ParamSets.npy b/BayesValidRox/tests/PollutionTest/data/ParamSets.npy new file mode 100644 index 0000000000000000000000000000000000000000..42d580ee270f2f9b55d46ee845d308567849905d Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/ParamSets.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/mean.npy b/BayesValidRox/tests/PollutionTest/data/mean.npy new file mode 100644 index 0000000000000000000000000000000000000000..dd05d21e7e85d24a220a896ce794da1647520a08 Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/mean.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/refBME_KLD.npy b/BayesValidRox/tests/PollutionTest/data/refBME_KLD.npy new file mode 100644 index 0000000000000000000000000000000000000000..9ba819a69039a242e1921362f96fe56bc4a62dd6 Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/refBME_KLD.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/std.npy b/BayesValidRox/tests/PollutionTest/data/std.npy new file mode 100644 index 0000000000000000000000000000000000000000..2a085253272e1b894944e2ad458c36e805ead161 Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/std.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/validLikelihoods.npy b/BayesValidRox/tests/PollutionTest/data/validLikelihoods.npy new file mode 100644 index 0000000000000000000000000000000000000000..72609ba49b08843737860d40dc6c4593147cfbea Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/validLikelihoods.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/validSet.npy b/BayesValidRox/tests/PollutionTest/data/validSet.npy new file mode 100644 index 0000000000000000000000000000000000000000..a030354ed1399cf582a6bf328ff9a485d622b08d Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/validSet.npy differ diff --git a/BayesValidRox/tests/PollutionTest/data/validSetOutput.npy b/BayesValidRox/tests/PollutionTest/data/validSetOutput.npy new file mode 100644 index 0000000000000000000000000000000000000000..a570452f76f8795ebcc87eb96adaa6d5b8f4b0cf Binary files /dev/null and b/BayesValidRox/tests/PollutionTest/data/validSetOutput.npy differ