[ModelComparison] add model weights and bayes factor plots.

589a7159 · Farid Mohammadi · fc26f76f · 589a7159 · 589a7159 · 589a7159
Commit 589a7159 authored 3 years ago by Farid Mohammadi
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,8 @@ BayesValidRox/surrogate_models/__pycache__/*
 BayesValidRox/tests/AnalyticalFunction/*.pkl
 BayesValidRox/tests/AnalyticalFunction/Outputs_*
 BayesValidRox/tests/AnalyticalFunction/__pycache__/*
+alidRox/tests/adaptivePlots/*
+

 # BeamTest
 BayesValidRox/tests/BeamTest/__pycache__/*
@@ -77,3 +79,8 @@ BayesValidRox/tests/PollutionTest/__pycache__/*
 BayesValidRox/tests/PollutionTest/Outputs_*
 BayesValidRox/tests/PollutionTest/*.npy
 BayesValidRox/tests/PollutionTest/*.pkl
+
+# ModelComparisonTest
+BayesValidRox/tests/ModelComparison/__pycache__/*
+BayesValidRox/tests/ModelComparison/Outputs_*
+BayesValidRox/tests/ModelComparison/*.pkl
--- a/BayesValidRox/BayesInference/BayesModelComparison.py
+++ b/BayesValidRox/BayesInference/BayesModelComparison.py
@@ -20,6 +20,7 @@ from joblib import Parallel,delayed
 from scipy.stats import multivariate_normal
 from sklearn.metrics import mean_squared_error, r2_score
 from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.patches as patches
 import matplotlib.pylab as plt
 SIZE = 30
 plt.rc('figure', figsize = (24, 16))
@@ -40,8 +41,8 @@ plt.rc('figure', titlesize=SIZE)  # fontsize of the figure title
 from .BayesInference import BayesInference

 class BayesModelComparison:
-    def __init__(self):
-        self.Name = 'Calib'
+    def __init__(self, justifiability=True):
+        self.justifiability = justifiability
        self.SamplingMethod = 'rejection'
    
    #--------------------------------------------------------------------------------------------------------
@@ -50,6 +51,9 @@ class BayesModelComparison:
        if not isinstance(modelDict, dict):
            raise Exception("To run model comparsion, you need to pass a dictionary of models.")
        
+        # Extract model names
+        self.modelNames = list(modelDict.keys())
+        
        # Run create Interface for each model
        bayesDict = {}
        for model in modelDict.keys():
@@ -62,10 +66,237 @@ class BayesModelComparison:
            for key, value in optsDict.items():
                if key in BayesOpts.__dict__.keys():
                    setattr(BayesOpts, key, value)                        
-
+            
+            # TODO: Pass model outputs as perturbed data
+            BayesOpts.perturbedData = None
+            
            bayesDict[model] = BayesOpts.create_Inference(modelDict[model])
            print("-"*20)

-        # TODO: Compute model weights
+        # Compute model weights
+        BME_Dict= dict()
+        for modelName, bayesObj in bayesDict.items():
+            BME_Dict[modelName] = np.exp(bayesObj.logBME)
+        modelWeights = self.cal_modelWeight(BME_Dict)
+        
+        # Plot model weights
+        self.BoxPlot_ModelWeights(modelWeights, 'model_weights')
+        
+        # Plot kde plot for model weights
+        self.kdePlot_BayesFactor(BME_Dict, 'kde_plot')
        
        return bayesDict
+    
+    #--------------------------------------------------------------------------------------------------------
+    def cal_modelWeight(self, BME_Dict):
+        """
+        Normalize the BME (Asumption: Model Prior weights are equal for models)
+
+        Parameters
+        ----------
+        BME_Dict : TYPE
+            DESCRIPTION.
+
+        Returns
+        -------
+        all_ModelWeights : TYPE
+            DESCRIPTION.
+        ModelWeights : TYPE
+            DESCRIPTION.
+
+        """
+        # Stack the BME values for all models
+        allBME = np.vstack(list(BME_Dict.values()))
+        
+        # Print statistics
+        print('-'*10+"Statistical summary of BME"+'-'*10)
+        print(np.quantile(np.log(allBME), 0.25, axis=1)-np.quantile(np.log(allBME), 0.5, axis=1))
+        print(np.quantile(np.log(allBME), 0.5, axis=1))
+        print(np.quantile(np.log(allBME), 0.75, axis=1)-np.quantile(np.log(allBME), 0.5, axis=1))
+        
+        # Model weights
+        modelWeights = np.divide(allBME, np.nansum(allBME, axis=0))
+        
+        print('\n'+'-'*10+"Statistical summary of Model Weights"+'-'*10)
+        print(np.quantile(modelWeights, 0.25, axis=1)-np.quantile(modelWeights, 0.5, axis=1))
+        print(np.quantile(modelWeights, 0.5, axis=1))
+        print(np.quantile(modelWeights, 0.75, axis=1)-np.quantile(modelWeights, 0.5, axis=1))
+        
+        return modelWeights
+
+    #--------------------------------------------------------------------------------------------------------
+    def BoxPlot_ModelWeights(self, modelWeights, plotName):
+        
+        # mkdir for plots
+        directory = 'Outputs_Comparison/'
+        if not os.path.exists(directory): os.makedirs(directory)
+    
+        # Create figure
+        fig, ax = plt.subplots()
+    
+        # Filter data using np.isnan
+        mask = ~np.isnan(modelWeights.T)
+        filtered_data = [d[m] for d, m in zip(modelWeights, mask.T)]
+    
+        # Create the boxplot
+        bp = ax.boxplot(filtered_data, patch_artist=True, showfliers=False)
+    
+        ## change outline color, fill color and linewidth of the boxes
+        for box in bp['boxes']:
+            # change outline color
+            box.set( color='#7570b3', linewidth=4)
+            # change fill color
+            box.set( facecolor = '#1b9e77' )
+    
+        ## change color and linewidth of the whiskers
+        for whisker in bp['whiskers']:
+            whisker.set(color='#7570b3', linewidth=2)
+    
+        ## change color and linewidth of the caps
+        for cap in bp['caps']:
+            cap.set(color='#7570b3', linewidth=2)
+    
+        ## change color and linewidth of the medians
+        for median in bp['medians']:
+            median.set(color='#b2df8a', linewidth=2)
+    
+        ## change the style of fliers and their fill
+        # for flier in bp['fliers']:
+        #     flier.set(marker='o', color='#e7298a', alpha=0.75)
+    
+        ## Custom x-axis labels
+        modelNames = [model.replace('_','$-$') for model in self.modelNames]
+        ax.set_xticklabels(modelNames)
+    
+        ax.set_ylabel('Weight', fontsize = SIZE)
+    
+        # Title
+        plt.title('Posterior Model Weights')
+    
+        # Set y lim
+        ax.set_ylim((-0.05,1.05))
+    
+        # Set size of the ticks
+        for t in ax.get_xticklabels():
+            t.set_fontsize(SIZE)
+        for t in ax.get_yticklabels():
+            t.set_fontsize(SIZE)
+    
+        # Save the figure
+        #fig.savefig('./'+OutputDir+directory+'ModelWeights'+plotName+'.svg', bbox_inches='tight')
+        fig.savefig('./'+directory+'ModelWeights'+plotName+'.pdf', bbox_inches='tight')
+    
+        plt.close()
+    #--------------------------------------------------------------------------------------------------------
+    def kdePlot_BayesFactor(self, BME_Dict, plotName):
+
+        # mkdir for plots
+        directory = 'Outputs_Comparison/'
+        if not os.path.exists(directory): os.makedirs(directory)
+    
+        Colors=["blue","green","gray", "brown"]
+    
+        modelNames = list(BME_Dict.keys())
+        nModels = len(modelNames)
+    
+        # Plots
+        fig, axes = plt.subplots(nrows=nModels, ncols=nModels, sharex=True, sharey=True)
+    
+        for i, key_i in enumerate(modelNames):
+    
+            for j, key_j in enumerate(modelNames):
+                ax = axes[i,j]
+                # Set size of the ticks
+                for t in ax.get_xticklabels():
+                    t.set_fontsize(SIZE)
+                for t in ax.get_yticklabels():
+                    t.set_fontsize(SIZE)
+    
+                if j != i:
+    
+                    # Null hypothesis: key_j is the better model
+                    BayesFactor = np.log10(np.divide(BME_Dict[key_i], BME_Dict[key_j]))
+    
+                    #sns.kdeplot(BayesFactor, ax=ax, color=Colors[i], shade=True)
+                    #sns.histplot(BayesFactor, ax=ax, stat="probability", kde=True, element='step',
+                                   #color=Colors[j])
+    
+                    # --https://stackoverflow.com/questions/55128462/how-to-normalize-seaborn-distplot ---
+                    # taken from seaborn's source code (utils.py and distributions.py)
+                    def seaborn_kde_support(data, bw, gridsize, cut, clip):
+                        if clip is None:
+                            clip = (-np.inf, np.inf)
+                        support_min = max(data.min() - bw * cut, clip[0])
+                        support_max = min(data.max() + bw * cut, clip[1])
+                        return np.linspace(support_min, support_max, gridsize)
+    
+                    kde_estim = stats.gaussian_kde(BayesFactor, bw_method='scott')
+    
+                    # manual linearization of data
+                    #linearized = np.linspace(quotient.min(), quotient.max(), num=500)
+    
+                    # or better: mimic seaborn's internal stuff
+                    bw = kde_estim.scotts_factor() * np.std(BayesFactor)
+                    linearized = seaborn_kde_support(BayesFactor, bw, 100, 3, None)
+    
+                    # computes values of the estimated function on the estimated linearized inputs
+                    Z = kde_estim.evaluate(linearized)
+    
+                    # https://stackoverflow.com/questions/29661574/normalize-numpy-array-columns-in-python
+                    def normalize(x):
+                        return (x - x.min(0)) / x.ptp(0)
+    
+                    # normalize so it is between 0;1
+                    Z2 = normalize(Z)
+                    ax.plot(linearized, Z2, "-", color=Colors[i],linewidth=4)
+                    ax.fill_between(linearized, 0, Z2, color=Colors[i], alpha=0.25)
+    
+                    #ax.set_ylim([0,0.75])
+                    # Draw BF significant levels according to Jeffreys 1961
+                    ax.axvline(x=np.log10(3),ymin=0, linewidth=4, color='dimgrey') # Strong evidence for both models
+                    ax.axvline(x=np.log10(10),ymin=0,linewidth=4, color='orange') # Strong evidence for one model
+                    ax.axvline(x=np.log10(100),ymin=0,linewidth=4, color='r') # Decisive evidence for one model
+    
+                    # legend
+                    BF_label = key_i.replace('_','$-$') + '/' + key_j.replace('_','$-$')
+                    legend_elements = [patches.Patch(facecolor=Colors[i], edgecolor=Colors[i],label='BF('+BF_label+')')]
+                    ax.legend(loc='upper left', handles=legend_elements,fontsize=SIZE-(nModels+1)*5)
+    
+                elif j == i:
+                    # build a rectangle in axes coords
+                    left, width = 0, 1
+                    bottom, height = 0, 1
+    
+                    # axes coordinates are 0,0 is bottom left and 1,1 is upper right
+                    p = patches.Rectangle(
+                        (left, bottom), width, height, color ='white' ,
+                        fill=True, transform=ax.transAxes, clip_on=False
+                        )
+                    ax.grid(False)
+                    ax.add_patch(p)
+                    # ax.text(0.5*(left+right), 0.5*(bottom+top), key_i,
+                    fsize = SIZE+20 if nModels < 4 else SIZE
+                    ax.text(0.5, 0.5, key_i.replace('_','$-$'),
+                    horizontalalignment='center',
+                    verticalalignment='center',
+                    fontsize=fsize, color=Colors[i],
+                    transform=ax.transAxes)
+    
+        # Defining custom 'ylim' values.
+        custom_ylim = (0, 1.05)
+    
+        # Setting the values for all axes.
+        plt.setp(axes, ylim=custom_ylim)
+    
+        # set labels
+        for i in range(nModels):
+            axes[-1,i].set_xlabel('log$_{10}$(BF)', fontsize=SIZE)
+            axes[i, 0].set_ylabel('Probability', fontsize=SIZE)
+    
+        # Adjust subplots
+        plt.subplots_adjust(wspace=0.2, hspace=0.1)
+    
+        plt.savefig('./'+directory+'BayesFactor'+plotName+'.svg', bbox_inches = 'tight')
+        plt.savefig('./'+directory+'BayesFactor'+plotName+'.pdf', bbox_inches = 'tight')
+    
+        plt.close()
--- a/BayesValidRox/tests/ModelComparison/Test_model_comparison.py
+++ b/BayesValidRox/tests/ModelComparison/Test_model_comparison.py
@@ -182,6 +182,9 @@ if __name__ == "__main__":
    DiscrepancyOpts.Parameters = pd.DataFrame(sigma**2,columns=['Z'])
    
    # ----- Define the options model -------
+    metaModels = {"L2_model":L2_MetaModel,
+                  "NL4_model":NL4_MetaModel
+                  }
    # MCMC inference method
    OptsDict_MCMC = {"SamplingMethod":"MCMC",
                     "MCMCnwalkers":200,
@@ -193,16 +196,12 @@ if __name__ == "__main__":
    # BME Bootstrap
    OptsDict_Bootstrap = {"Bootstrap":True,
                          "BootstrapItrNr":100,
-                          "BootstrapNoise":0.5,
+                          "BootstrapNoise":0.05,
                          "Discrepancy":DiscrepancyOpts,
                          "emulator": True,
-                          "PlotPostPred":True
+                          "PlotPostPred":False
                          }
    
-    metaModels = {"L2_model":L2_MetaModel,
-                  "NL4_model":NL4_MetaModel
-                  }
-    
    # Run model comparison
    BayesOpts = BayesModelComparison()
    Bayes_all = BayesOpts.create_ModelComparison(metaModels,