diff --git a/.gitignore b/.gitignore index 194332a8e0168889d72626e3dd037f25d9c2624a..63d966f75a72a731d550bf0075f7d8d85b9b2575 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,8 @@ BayesValidRox/surrogate_models/__pycache__/* BayesValidRox/tests/AnalyticalFunction/*.pkl BayesValidRox/tests/AnalyticalFunction/Outputs_* BayesValidRox/tests/AnalyticalFunction/__pycache__/* +alidRox/tests/adaptivePlots/* + # BeamTest BayesValidRox/tests/BeamTest/__pycache__/* @@ -77,3 +79,8 @@ BayesValidRox/tests/PollutionTest/__pycache__/* BayesValidRox/tests/PollutionTest/Outputs_* BayesValidRox/tests/PollutionTest/*.npy BayesValidRox/tests/PollutionTest/*.pkl + +# ModelComparisonTest +BayesValidRox/tests/ModelComparison/__pycache__/* +BayesValidRox/tests/ModelComparison/Outputs_* +BayesValidRox/tests/ModelComparison/*.pkl diff --git a/BayesValidRox/BayesInference/BayesModelComparison.py b/BayesValidRox/BayesInference/BayesModelComparison.py index 0865b95938d3d20a7bb00c51580c0f92c8377ebd..7af5887b9f2123cec2201266d62b019f404ada86 100644 --- a/BayesValidRox/BayesInference/BayesModelComparison.py +++ b/BayesValidRox/BayesInference/BayesModelComparison.py @@ -20,6 +20,7 @@ from joblib import Parallel,delayed from scipy.stats import multivariate_normal from sklearn.metrics import mean_squared_error, r2_score from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.patches as patches import matplotlib.pylab as plt SIZE = 30 plt.rc('figure', figsize = (24, 16)) @@ -40,8 +41,8 @@ plt.rc('figure', titlesize=SIZE) # fontsize of the figure title from .BayesInference import BayesInference class BayesModelComparison: - def __init__(self): - self.Name = 'Calib' + def __init__(self, justifiability=True): + self.justifiability = justifiability self.SamplingMethod = 'rejection' #-------------------------------------------------------------------------------------------------------- @@ -50,6 +51,9 @@ class BayesModelComparison: if not isinstance(modelDict, dict): raise Exception("To run model comparsion, you need to pass a dictionary of models.") + # Extract model names + self.modelNames = list(modelDict.keys()) + # Run create Interface for each model bayesDict = {} for model in modelDict.keys(): @@ -62,10 +66,237 @@ class BayesModelComparison: for key, value in optsDict.items(): if key in BayesOpts.__dict__.keys(): setattr(BayesOpts, key, value) - + + # TODO: Pass model outputs as perturbed data + BayesOpts.perturbedData = None + bayesDict[model] = BayesOpts.create_Inference(modelDict[model]) print("-"*20) - # TODO: Compute model weights + # Compute model weights + BME_Dict= dict() + for modelName, bayesObj in bayesDict.items(): + BME_Dict[modelName] = np.exp(bayesObj.logBME) + modelWeights = self.cal_modelWeight(BME_Dict) + + # Plot model weights + self.BoxPlot_ModelWeights(modelWeights, 'model_weights') + + # Plot kde plot for model weights + self.kdePlot_BayesFactor(BME_Dict, 'kde_plot') return bayesDict + + #-------------------------------------------------------------------------------------------------------- + def cal_modelWeight(self, BME_Dict): + """ + Normalize the BME (Asumption: Model Prior weights are equal for models) + + Parameters + ---------- + BME_Dict : TYPE + DESCRIPTION. + + Returns + ------- + all_ModelWeights : TYPE + DESCRIPTION. + ModelWeights : TYPE + DESCRIPTION. + + """ + # Stack the BME values for all models + allBME = np.vstack(list(BME_Dict.values())) + + # Print statistics + print('-'*10+"Statistical summary of BME"+'-'*10) + print(np.quantile(np.log(allBME), 0.25, axis=1)-np.quantile(np.log(allBME), 0.5, axis=1)) + print(np.quantile(np.log(allBME), 0.5, axis=1)) + print(np.quantile(np.log(allBME), 0.75, axis=1)-np.quantile(np.log(allBME), 0.5, axis=1)) + + # Model weights + modelWeights = np.divide(allBME, np.nansum(allBME, axis=0)) + + print('\n'+'-'*10+"Statistical summary of Model Weights"+'-'*10) + print(np.quantile(modelWeights, 0.25, axis=1)-np.quantile(modelWeights, 0.5, axis=1)) + print(np.quantile(modelWeights, 0.5, axis=1)) + print(np.quantile(modelWeights, 0.75, axis=1)-np.quantile(modelWeights, 0.5, axis=1)) + + return modelWeights + + #-------------------------------------------------------------------------------------------------------- + def BoxPlot_ModelWeights(self, modelWeights, plotName): + + # mkdir for plots + directory = 'Outputs_Comparison/' + if not os.path.exists(directory): os.makedirs(directory) + + # Create figure + fig, ax = plt.subplots() + + # Filter data using np.isnan + mask = ~np.isnan(modelWeights.T) + filtered_data = [d[m] for d, m in zip(modelWeights, mask.T)] + + # Create the boxplot + bp = ax.boxplot(filtered_data, patch_artist=True, showfliers=False) + + ## change outline color, fill color and linewidth of the boxes + for box in bp['boxes']: + # change outline color + box.set( color='#7570b3', linewidth=4) + # change fill color + box.set( facecolor = '#1b9e77' ) + + ## change color and linewidth of the whiskers + for whisker in bp['whiskers']: + whisker.set(color='#7570b3', linewidth=2) + + ## change color and linewidth of the caps + for cap in bp['caps']: + cap.set(color='#7570b3', linewidth=2) + + ## change color and linewidth of the medians + for median in bp['medians']: + median.set(color='#b2df8a', linewidth=2) + + ## change the style of fliers and their fill + # for flier in bp['fliers']: + # flier.set(marker='o', color='#e7298a', alpha=0.75) + + ## Custom x-axis labels + modelNames = [model.replace('_','$-$') for model in self.modelNames] + ax.set_xticklabels(modelNames) + + ax.set_ylabel('Weight', fontsize = SIZE) + + # Title + plt.title('Posterior Model Weights') + + # Set y lim + ax.set_ylim((-0.05,1.05)) + + # Set size of the ticks + for t in ax.get_xticklabels(): + t.set_fontsize(SIZE) + for t in ax.get_yticklabels(): + t.set_fontsize(SIZE) + + # Save the figure + #fig.savefig('./'+OutputDir+directory+'ModelWeights'+plotName+'.svg', bbox_inches='tight') + fig.savefig('./'+directory+'ModelWeights'+plotName+'.pdf', bbox_inches='tight') + + plt.close() + #-------------------------------------------------------------------------------------------------------- + def kdePlot_BayesFactor(self, BME_Dict, plotName): + + # mkdir for plots + directory = 'Outputs_Comparison/' + if not os.path.exists(directory): os.makedirs(directory) + + Colors=["blue","green","gray", "brown"] + + modelNames = list(BME_Dict.keys()) + nModels = len(modelNames) + + # Plots + fig, axes = plt.subplots(nrows=nModels, ncols=nModels, sharex=True, sharey=True) + + for i, key_i in enumerate(modelNames): + + for j, key_j in enumerate(modelNames): + ax = axes[i,j] + # Set size of the ticks + for t in ax.get_xticklabels(): + t.set_fontsize(SIZE) + for t in ax.get_yticklabels(): + t.set_fontsize(SIZE) + + if j != i: + + # Null hypothesis: key_j is the better model + BayesFactor = np.log10(np.divide(BME_Dict[key_i], BME_Dict[key_j])) + + #sns.kdeplot(BayesFactor, ax=ax, color=Colors[i], shade=True) + #sns.histplot(BayesFactor, ax=ax, stat="probability", kde=True, element='step', + #color=Colors[j]) + + # --https://stackoverflow.com/questions/55128462/how-to-normalize-seaborn-distplot --- + # taken from seaborn's source code (utils.py and distributions.py) + def seaborn_kde_support(data, bw, gridsize, cut, clip): + if clip is None: + clip = (-np.inf, np.inf) + support_min = max(data.min() - bw * cut, clip[0]) + support_max = min(data.max() + bw * cut, clip[1]) + return np.linspace(support_min, support_max, gridsize) + + kde_estim = stats.gaussian_kde(BayesFactor, bw_method='scott') + + # manual linearization of data + #linearized = np.linspace(quotient.min(), quotient.max(), num=500) + + # or better: mimic seaborn's internal stuff + bw = kde_estim.scotts_factor() * np.std(BayesFactor) + linearized = seaborn_kde_support(BayesFactor, bw, 100, 3, None) + + # computes values of the estimated function on the estimated linearized inputs + Z = kde_estim.evaluate(linearized) + + # https://stackoverflow.com/questions/29661574/normalize-numpy-array-columns-in-python + def normalize(x): + return (x - x.min(0)) / x.ptp(0) + + # normalize so it is between 0;1 + Z2 = normalize(Z) + ax.plot(linearized, Z2, "-", color=Colors[i],linewidth=4) + ax.fill_between(linearized, 0, Z2, color=Colors[i], alpha=0.25) + + #ax.set_ylim([0,0.75]) + # Draw BF significant levels according to Jeffreys 1961 + ax.axvline(x=np.log10(3),ymin=0, linewidth=4, color='dimgrey') # Strong evidence for both models + ax.axvline(x=np.log10(10),ymin=0,linewidth=4, color='orange') # Strong evidence for one model + ax.axvline(x=np.log10(100),ymin=0,linewidth=4, color='r') # Decisive evidence for one model + + # legend + BF_label = key_i.replace('_','$-$') + '/' + key_j.replace('_','$-$') + legend_elements = [patches.Patch(facecolor=Colors[i], edgecolor=Colors[i],label='BF('+BF_label+')')] + ax.legend(loc='upper left', handles=legend_elements,fontsize=SIZE-(nModels+1)*5) + + elif j == i: + # build a rectangle in axes coords + left, width = 0, 1 + bottom, height = 0, 1 + + # axes coordinates are 0,0 is bottom left and 1,1 is upper right + p = patches.Rectangle( + (left, bottom), width, height, color ='white' , + fill=True, transform=ax.transAxes, clip_on=False + ) + ax.grid(False) + ax.add_patch(p) + # ax.text(0.5*(left+right), 0.5*(bottom+top), key_i, + fsize = SIZE+20 if nModels < 4 else SIZE + ax.text(0.5, 0.5, key_i.replace('_','$-$'), + horizontalalignment='center', + verticalalignment='center', + fontsize=fsize, color=Colors[i], + transform=ax.transAxes) + + # Defining custom 'ylim' values. + custom_ylim = (0, 1.05) + + # Setting the values for all axes. + plt.setp(axes, ylim=custom_ylim) + + # set labels + for i in range(nModels): + axes[-1,i].set_xlabel('log$_{10}$(BF)', fontsize=SIZE) + axes[i, 0].set_ylabel('Probability', fontsize=SIZE) + + # Adjust subplots + plt.subplots_adjust(wspace=0.2, hspace=0.1) + + plt.savefig('./'+directory+'BayesFactor'+plotName+'.svg', bbox_inches = 'tight') + plt.savefig('./'+directory+'BayesFactor'+plotName+'.pdf', bbox_inches = 'tight') + + plt.close() diff --git a/BayesValidRox/tests/ModelComparison/Test_model_comparison.py b/BayesValidRox/tests/ModelComparison/Test_model_comparison.py index 1b43f1b1d98221bdc0bfa8a98335f1a134d9013c..fdd9b615e7ff56488c2ffc8d9acc5f2a278e1c65 100644 --- a/BayesValidRox/tests/ModelComparison/Test_model_comparison.py +++ b/BayesValidRox/tests/ModelComparison/Test_model_comparison.py @@ -182,6 +182,9 @@ if __name__ == "__main__": DiscrepancyOpts.Parameters = pd.DataFrame(sigma**2,columns=['Z']) # ----- Define the options model ------- + metaModels = {"L2_model":L2_MetaModel, + "NL4_model":NL4_MetaModel + } # MCMC inference method OptsDict_MCMC = {"SamplingMethod":"MCMC", "MCMCnwalkers":200, @@ -193,16 +196,12 @@ if __name__ == "__main__": # BME Bootstrap OptsDict_Bootstrap = {"Bootstrap":True, "BootstrapItrNr":100, - "BootstrapNoise":0.5, + "BootstrapNoise":0.05, "Discrepancy":DiscrepancyOpts, "emulator": True, - "PlotPostPred":True + "PlotPostPred":False } - metaModels = {"L2_model":L2_MetaModel, - "NL4_model":NL4_MetaModel - } - # Run model comparison BayesOpts = BayesModelComparison() Bayes_all = BayesOpts.create_ModelComparison(metaModels,