From 7c2ab4eb6091a14171e700ccc4daa04eba895894 Mon Sep 17 00:00:00 2001 From: farid <farid.mohammadi@iws.uni-stuttgart.de> Date: Tue, 18 Jan 2022 15:22:48 +0100 Subject: [PATCH] [PA-A] update scripts for format style. --- BayesValidRox/tests/PA-A/Benchmark_PAA.py | 488 ++++++++------- .../tests/PA-A/ffpm_validation_stokesdarcy.py | 570 ++++++++++-------- .../tests/PA-A/ffpm_validation_stokespnm.py | 466 +++++++------- BayesValidRox/tests/PA-A/util/EDY_plot.py | 218 ++++--- .../tests/PA-A/util/post_plot_PAPER.py | 257 ++++---- .../tests/PA-A/util/vel_diagnostics.py | 135 +++-- 6 files changed, 1210 insertions(+), 924 deletions(-) diff --git a/BayesValidRox/tests/PA-A/Benchmark_PAA.py b/BayesValidRox/tests/PA-A/Benchmark_PAA.py index 531eca92e..47837798e 100755 --- a/BayesValidRox/tests/PA-A/Benchmark_PAA.py +++ b/BayesValidRox/tests/PA-A/Benchmark_PAA.py @@ -1,12 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Thu Feb 6 14:59:50 2020 -@author: mohammadi +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Thu Feb 6 2020 + """ -import sys, os, joblib +import sys +import os +import joblib import numpy as np from scipy import stats import pandas as pd @@ -14,77 +24,75 @@ import seaborn as sns from matplotlib.patches import Patch import matplotlib.lines as mlines import matplotlib -matplotlib.use('agg') + +matplotlib.use("agg") import matplotlib.pylab as plt + SIZE = 40 -plt.rc('figure', figsize = (24, 16)) -plt.rc('font', family='serif', serif='Arial') -plt.rc('font', size=SIZE) -plt.rc('axes', grid = True) -plt.rc('text', usetex=True) -plt.rc('axes', linewidth=3) -plt.rc('axes', grid=True) -plt.rc('grid', linestyle="-") -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) # legend fontsize -plt.rc('figure', titlesize=SIZE) # fontsize of the figure title - - -try: - import cPickle as pickle -except ModuleNotFoundError: - import _pickle as pickle +plt.rc("figure", figsize=(24, 16)) +plt.rc("font", family="serif", serif="Arial") +plt.rc("font", size=SIZE) +plt.rc("axes", grid=True) +plt.rc("text", usetex=True) +plt.rc("axes", linewidth=3) +plt.rc("axes", grid=True) +plt.rc("grid", linestyle="-") +plt.rc("axes", titlesize=SIZE) # fontsize of the axes title +plt.rc("axes", labelsize=SIZE) # fontsize of the x and y labels +plt.rc("xtick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("ytick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("legend", fontsize=SIZE) # legend fontsize +plt.rc("figure", titlesize=SIZE) # fontsize of the figure title # Add BayesValidRox path -sys.path.insert(0,'./../../../BayesValidRox/') - +sys.path.insert(0, "./../../../BayesValidRox/") # Import the scripts import ffpm_validation_stokesdarcy as stokesdarcy import ffpm_validation_stokespnm as stokespnm + def kdePlot_BME(dof, BME, ModelNames, plotName): # mkdir for plots - directory = 'logBME_vs_BME/' - if not os.path.exists(OutputDir+directory): os.makedirs(OutputDir+directory) + directory = "logBME_vs_BME/" + if not os.path.exists(OutputDir + directory): + os.makedirs(OutputDir + directory) # Plots fig, ax = plt.subplots() - legend_elements=[Patch(facecolor='k', edgecolor='k', - label='TOM')] + legend_elements = [Patch(facecolor="k", edgecolor="k", label="TOM")] - Colors=["blue","green","gray"] + Colors = ["blue", "green", "gray"] # log-scale BME BME = np.log10(BME) # ------------------ TOM ------------------ min_logBME = np.nanmin(BME[BME != -np.inf]) max_logBME = np.nanmax(BME[BME != -np.inf]) - #logBMEs = np.linspace(min_logBME, np.finfo(np.float16).eps, BME.shape[1]) + # logBMEs = np.linspace(min_logBME, np.finfo(np.float16).eps, BME.shape[1]) logBMEs = np.linspace(min_logBME, max_logBME, BME.shape[1]) - #logBMEs = np.linspace(10**min_logBME, 10**max_logBME, BME.shape[1]) + # logBMEs = np.linspace(10**min_logBME, 10**max_logBME, BME.shape[1]) - #logTOMBME = stats.chi2(dof).pdf(-1*logBMEs) - #logTOMBME = np.log10(stats.chi2(dof).pdf(logBMEs)) + # logTOMBME = stats.chi2(dof).pdf(-1*logBMEs) + # logTOMBME = np.log10(stats.chi2(dof).pdf(logBMEs)) logTOMBME = stats.chi2.rvs(dof, size=BME.shape[1]) # plot TOM - sns.kdeplot(logTOMBME, ax=ax, color='k', shade=True, lw=3.5, alpha=0.3) + sns.kdeplot(logTOMBME, ax=ax, color="k", shade=True, lw=3.5, alpha=0.3) # Plot BMEs for all models for idx in range(BME.shape[0]): sns.kdeplot(BME[idx], ax=ax, color=Colors[idx], shade=True, lw=4) - legend_elements.append(Patch(facecolor=Colors[idx], edgecolor=Colors[idx], - label=ModelNames[idx])) + legend_elements.append( + Patch(facecolor=Colors[idx], edgecolor=Colors[idx], + label=ModelNames[idx]) + ) - ax.set_xlabel('log$_{10}$(BME)', fontsize = SIZE) - ax.set_ylabel('Probability density', fontsize = SIZE) + ax.set_xlabel("log$_{10}$(BME)", fontsize=SIZE) + ax.set_ylabel("Probability density", fontsize=SIZE) - ax.legend(handles=legend_elements,fontsize=SIZE) + ax.legend(handles=legend_elements, fontsize=SIZE) # Set size of the ticks for t in ax.get_xticklabels(): @@ -92,15 +100,22 @@ def kdePlot_BME(dof, BME, ModelNames, plotName): for t in ax.get_yticklabels(): t.set_fontsize(SIZE) - plt.savefig('./'+OutputDir+directory+'BME_TOM_'+plotName+'.svg', bbox_inches = 'tight') - plt.savefig('./'+OutputDir+directory+'BME_TOM_'+plotName+'.pdf', bbox_inches = 'tight') + plt.savefig( + "./" + OutputDir + directory + "BME_TOM_" + plotName + ".svg", + bbox_inches="tight", + ) + plt.savefig( + "./" + OutputDir + directory + "BME_TOM_" + plotName + ".pdf", + bbox_inches="tight", + ) plt.close() return logTOMBME + def cal_modelWeight(BME_Dict): - """ + """ Normalize the BME (Asumption: Model Prior weights are equal for models) Parameters @@ -116,33 +131,40 @@ def cal_modelWeight(BME_Dict): DESCRIPTION. """ - allBME = np.empty((0,nBootstrapItr)) - for key in BME_Dict.keys(): - allBME = np.vstack((allBME,np.log(BME_Dict[key]))) - - all_ModelWeights = np.divide(np.exp(allBME), np.nansum(np.exp(allBME), axis=0)) - ModelWeights = np.divide(np.exp(allBME[:3]), np.nansum(np.exp(allBME[:3]), axis=0)) - - # Print statistics - print('-'*10+"Statistical summary of BME"+'-'*10) - print(np.quantile(allBME[:3], 0.25, axis=1)-np.quantile(allBME[:3], 0.5, axis=1)) - print(np.quantile(allBME[:3], 0.5, axis=1)) - print(np.quantile(allBME[:3], 0.75, axis=1)-np.quantile(allBME[:3], 0.5, axis=1)) - - print('\n'+'-'*10+"Statistical summary of Model Weights"+'-'*10) - print(np.quantile(ModelWeights, 0.25, axis=1)-np.quantile(ModelWeights, 0.5, axis=1)) - print(np.quantile(ModelWeights, 0.5, axis=1)) - print(np.quantile(ModelWeights, 0.75, axis=1)-np.quantile(ModelWeights, 0.5, axis=1)) - - return all_ModelWeights,ModelWeights + allBME = np.empty((0, nBootstrapItr)) + for key in BME_Dict.keys(): + allBME = np.vstack((allBME, np.log(BME_Dict[key]))) + + all_ModelWeights = np.divide(np.exp(allBME), np.nansum(np.exp(allBME), axis=0)) + ModelWeights = np.divide(np.exp(allBME[:3]), np.nansum(np.exp(allBME[:3]), axis=0)) + + # Print statistics + print("-" * 10 + "Statistical summary of BME" + "-" * 10) + print(np.quantile(allBME[:3], 0.25, axis=1) - np.quantile(allBME[:3], 0.5, axis=1)) + print(np.quantile(allBME[:3], 0.5, axis=1)) + print(np.quantile(allBME[:3], 0.75, axis=1) - np.quantile(allBME[:3], 0.5, axis=1)) + + print("\n" + "-" * 10 + "Statistical summary of Model Weights" + "-" * 10) + print( + np.quantile(ModelWeights, 0.25, axis=1) - np.quantile(ModelWeights, 0.5, axis=1) + ) + print(np.quantile(ModelWeights, 0.5, axis=1)) + print( + np.quantile(ModelWeights, 0.75, axis=1) - np.quantile(ModelWeights, 0.5, axis=1) + ) + + return all_ModelWeights, ModelWeights + def kdePlot_BayesFactor(BME_Dict, plotName): import matplotlib.patches as patches + # mkdir for plots - directory = 'BayesFactor/' - if not os.path.exists(OutputDir+directory): os.makedirs(OutputDir+directory) + directory = "BayesFactor/" + if not os.path.exists(OutputDir + directory): + os.makedirs(OutputDir + directory) - Colors=["blue","green","gray", "brown"] + Colors = ["blue", "green", "gray", "brown"] modelNames = list(BME_Dict.keys()) nModels = len(modelNames) @@ -153,7 +175,7 @@ def kdePlot_BayesFactor(BME_Dict, plotName): for i, key_i in enumerate(modelNames): for j, key_j in enumerate(modelNames): - ax = axes[i,j] + ax = axes[i, j] # Set size of the ticks for t in ax.get_xticklabels(): t.set_fontsize(SIZE) @@ -165,9 +187,9 @@ def kdePlot_BayesFactor(BME_Dict, plotName): # Null hypothesis: key_j is the better model BayesFactor = np.log10(np.divide(BME_Dict[key_i], BME_Dict[key_j])) - #sns.kdeplot(BayesFactor, ax=ax, color=Colors[i], shade=True) - #sns.histplot(BayesFactor, ax=ax, stat="probability", kde=True, element='step', - #color=Colors[j]) + # sns.kdeplot(BayesFactor, ax=ax, color=Colors[i], shade=True) + # sns.histplot(BayesFactor, ax=ax, stat="probability", kde=True, element='step', + # color=Colors[j]) # --https://stackoverflow.com/questions/55128462/how-to-normalize-seaborn-distplot --- # taken from seaborn's source code (utils.py and distributions.py) @@ -178,10 +200,10 @@ def kdePlot_BayesFactor(BME_Dict, plotName): support_max = min(data.max() + bw * cut, clip[1]) return np.linspace(support_min, support_max, gridsize) - kde_estim = stats.gaussian_kde(BayesFactor, bw_method='scott') + kde_estim = stats.gaussian_kde(BayesFactor, bw_method="scott") # manual linearization of data - #linearized = np.linspace(quotient.min(), quotient.max(), num=500) + # linearized = np.linspace(quotient.min(), quotient.max(), num=500) # or better: mimic seaborn's internal stuff bw = kde_estim.scotts_factor() * np.std(BayesFactor) @@ -196,19 +218,35 @@ def kdePlot_BayesFactor(BME_Dict, plotName): # normalize so it is between 0;1 Z2 = normalize(Z) - ax.plot(linearized, Z2, "-", color=Colors[i],linewidth=2) + ax.plot(linearized, Z2, "-", color=Colors[i], linewidth=2) ax.fill_between(linearized, 0, Z2, color=Colors[i], alpha=0.25) - #ax.set_ylim([0,0.75]) + # ax.set_ylim([0,0.75]) # Draw BF significant levels according to Jeffreys 1961 - ax.axvline(x=np.log10(3),ymin=0, linewidth=4, color='dimgrey') # Strong evidence for both models - ax.axvline(x=np.log10(10),ymin=0,linewidth=4, color='orange') # Strong evidence for one model - ax.axvline(x=np.log10(100),ymin=0,linewidth=4, color='r') # Decisive evidence for one model + ax.axvline( + x=np.log10(3), ymin=0, linewidth=4, color="dimgrey" + ) # Strong evidence for both models + ax.axvline( + x=np.log10(10), ymin=0, linewidth=4, color="orange" + ) # Strong evidence for one model + ax.axvline( + x=np.log10(100), ymin=0, linewidth=4, color="r" + ) # Decisive evidence for one model # legend - BF_label = key_i + '/' + key_j - legend_elements = [Patch(facecolor=Colors[i], edgecolor=Colors[i],label='BF('+BF_label+')')] - ax.legend(loc='upper left', handles=legend_elements,fontsize=SIZE-(nModels+1)*5) + BF_label = key_i + "/" + key_j + legend_elements = [ + Patch( + facecolor=Colors[i], + edgecolor=Colors[i], + label="BF(" + BF_label + ")", + ) + ] + ax.legend( + loc="upper left", + handles=legend_elements, + fontsize=SIZE - (nModels + 1) * 5, + ) elif j == i: # build a rectangle in axes coords @@ -217,18 +255,28 @@ def kdePlot_BayesFactor(BME_Dict, plotName): # axes coordinates are 0,0 is bottom left and 1,1 is upper right p = patches.Rectangle( - (left, bottom), width, height, color ='white' , - fill=True, transform=ax.transAxes, clip_on=False - ) + (left, bottom), + width, + height, + color="white", + fill=True, + transform=ax.transAxes, + clip_on=False, + ) ax.grid(False) ax.add_patch(p) # ax.text(0.5*(left+right), 0.5*(bottom+top), key_i, - fsize = SIZE+20 if nModels < 4 else SIZE - ax.text(0.5, 0.5, key_i, - horizontalalignment='center', - verticalalignment='center', - fontsize=fsize, color=Colors[i], - transform=ax.transAxes) + fsize = SIZE + 20 if nModels < 4 else SIZE + ax.text( + 0.5, + 0.5, + key_i, + horizontalalignment="center", + verticalalignment="center", + fontsize=fsize, + color=Colors[i], + transform=ax.transAxes, + ) # Defining custom 'ylim' values. custom_ylim = (0, 1.05) @@ -238,22 +286,30 @@ def kdePlot_BayesFactor(BME_Dict, plotName): # set labels for i in range(nModels): - axes[-1,i].set_xlabel('log$_{10}$(BF)', fontsize=SIZE) - axes[i, 0].set_ylabel('Probability', fontsize=SIZE) + axes[-1, i].set_xlabel("log$_{10}$(BF)", fontsize=SIZE) + axes[i, 0].set_ylabel("Probability", fontsize=SIZE) # Adjust subplots plt.subplots_adjust(wspace=0.2, hspace=0.1) - plt.savefig('./'+OutputDir+directory+'BayesFactor'+plotName+'.svg', bbox_inches = 'tight') - plt.savefig('./'+OutputDir+directory+'BayesFactor'+plotName+'.pdf', bbox_inches = 'tight') + plt.savefig( + "./" + OutputDir + directory + "BayesFactor" + plotName + ".svg", + bbox_inches="tight", + ) + plt.savefig( + "./" + OutputDir + directory + "BayesFactor" + plotName + ".pdf", + bbox_inches="tight", + ) plt.close() + def BoxPlot_ModelWeights(ModelWeights, plotName): # mkdir for plots - directory = 'ModelWeights/' - if not os.path.exists(OutputDir+directory): os.makedirs(OutputDir+directory) + directory = "ModelWeights/" + if not os.path.exists(OutputDir + directory): + os.makedirs(OutputDir + directory) # Create figure fig, ax = plt.subplots() @@ -266,38 +322,38 @@ def BoxPlot_ModelWeights(ModelWeights, plotName): bp = ax.boxplot(filtered_data, patch_artist=True, showfliers=False) ## change outline color, fill color and linewidth of the boxes - for box in bp['boxes']: + for box in bp["boxes"]: # change outline color - box.set( color='#7570b3', linewidth=4) + box.set(color="#7570b3", linewidth=4) # change fill color - box.set( facecolor = '#1b9e77' ) + box.set(facecolor="#1b9e77") ## change color and linewidth of the whiskers - for whisker in bp['whiskers']: - whisker.set(color='#7570b3', linewidth=2) + for whisker in bp["whiskers"]: + whisker.set(color="#7570b3", linewidth=2) ## change color and linewidth of the caps - for cap in bp['caps']: - cap.set(color='#7570b3', linewidth=2) + for cap in bp["caps"]: + cap.set(color="#7570b3", linewidth=2) ## change color and linewidth of the medians - for median in bp['medians']: - median.set(color='#b2df8a', linewidth=2) + for median in bp["medians"]: + median.set(color="#b2df8a", linewidth=2) ## change the style of fliers and their fill # for flier in bp['fliers']: # flier.set(marker='o', color='#e7298a', alpha=0.75) ## Custom x-axis labels - ax.set_xticklabels(list(BME_Dict.keys())[:len(ModelWeights)]) + ax.set_xticklabels(list(BME_Dict.keys())[: len(ModelWeights)]) - ax.set_ylabel('Weight', fontsize = SIZE) + ax.set_ylabel("Weight", fontsize=SIZE) # Title - plt.title('Posterior Model Weights') + plt.title("Posterior Model Weights") # Set y lim - ax.set_ylim((-0.05,1.05)) + ax.set_ylim((-0.05, 1.05)) # Set size of the ticks for t in ax.get_xticklabels(): @@ -306,26 +362,33 @@ def BoxPlot_ModelWeights(ModelWeights, plotName): t.set_fontsize(SIZE) # Save the figure - #fig.savefig('./'+OutputDir+directory+'ModelWeights'+plotName+'.svg', bbox_inches='tight') - fig.savefig('./'+OutputDir+directory+'ModelWeights'+plotName+'.pdf', bbox_inches='tight') + # fig.savefig('./'+OutputDir+directory+'ModelWeights'+plotName+'.svg', bbox_inches='tight') + fig.savefig( + "./" + OutputDir + directory + "ModelWeights" + plotName + ".pdf", + bbox_inches="tight", + ) plt.close() -def perturbData(data,OutputNames,noItr,noiseLevel): + +def perturbData(data, OutputNames, noItr, noiseLevel): ObservationData = data[OutputNames].to_numpy() nMeasurement, nOutputs = ObservationData.shape ntotalMeasurement = ObservationData[~np.isnan(ObservationData)].shape[0] - Data = np.zeros((noItr,ntotalMeasurement)) + Data = np.zeros((noItr, ntotalMeasurement)) Data[0] = ObservationData.T[~np.isnan(ObservationData.T)] - for itrIdx in range(1,noItr): + for itrIdx in range(1, noItr): data = np.zeros((nMeasurement, nOutputs)) for idx in range(len(OutputNames)): - std = np.nanstd(ObservationData[:,idx]) - if std == 0: std = 0.001 + std = np.nanstd(ObservationData[:, idx]) + if std == 0: + std = 0.001 noise = std * noiseLevel - data[:,idx] = np.add(ObservationData[:,idx] , np.random.normal(0, 1, ObservationData.shape[0]) * noise) - + data[:, idx] = np.add( + ObservationData[:, idx], + np.random.normal(0, 1, ObservationData.shape[0]) * noise, + ) Data[itrIdx] = data.T[~np.isnan(data.T)] return Data @@ -333,120 +396,135 @@ def perturbData(data,OutputNames,noItr,noiseLevel): if __name__ == "__main__": - #========================================================================== - #======================== Set Parameters ============================== - #========================================================================== + # ========================================================================== + # ======================== Set Parameters ============================== + # ========================================================================== # ---------- set scenario params ---------- - inclusion = 'squared' # squared or circular - inletLoc = 'top' # left or top - outputs = ['velocity [m/s]', 'p'] - + inclusion = "squared" # squared or circular + inletLoc = "left" # left or top + outputs = ["velocity [m/s]", "p"] + # ---------- set BayesValidRox params ---------- - PCEExpDesignMethod = 'normal' # 'normal' or 'sequential' - nInitSamples = 500 #50 Initial No. of orig. Model runs for surrogate training - nTotalSamples = 300 #100 Total No. of orig. Model runs for surrogate training - nBootstrapItr = 1000 # No. of bootstraping iterations for Bayesian analysis - BootstrapNoise_Avg = 0.0005 # Noise amount for bootstraping in Bayesian analysis - BootstrapNoise = 0.00005 # Noise amount for bootstraping in Bayesian analysis + PCEExpDesignMethod = "normal" # 'normal' or 'sequential' + nInitSamples = 500 # 50 Initial No. of orig. Model runs for surrogate training + nTotalSamples = 300 # 100 Total No. of orig. Model runs for surrogate training + nBootstrapItr = 1000 # No. of bootstraping iterations for Bayesian analysis + BootstrapNoise_Avg = 0.0005 # Noise amount for bootstraping in Bayesian analysis + BootstrapNoise = 0.00005 # Noise amount for bootstraping in Bayesian analysis # Perturbe data # data = pd.read_csv('data/stokesDataValid_{}_inclusion_{}Inflow.csv'.format(inclusion,inletLoc)) # perturbedDataAvg = perturbData(data,outputs,nBootstrapItr,BootstrapNoise_Avg) # np.savetxt('./data/perturbedValidDataAvg_{}_inclusion_{}Inflow.csv'.format(inclusion,inletLoc),perturbedDataAvg, delimiter=',') - perturbedDataAvg = np.loadtxt('./data/perturbedValidDataAvg_{}_inclusion_{}Inflow.csv'.format(inclusion,inletLoc),delimiter=',') - paramsAvg = (nInitSamples,nTotalSamples,nBootstrapItr,perturbedDataAvg) - + perturbedDataAvg = np.loadtxt( + f"./data/perturbedValidDataAvg_{inclusion}_inclusion_{inletLoc}Inflow.csv", + delimiter=",") + paramsAvg = (nInitSamples, nTotalSamples, nBootstrapItr, perturbedDataAvg) + # Perturbe non-averaged data for PNM # data = pd.read_csv('data/stokesDataValid_squared_inclusion_{}Inflow_without_averaging.csv'.format(inletLoc)) # perturbedData = perturbData(data,outputs,nBootstrapItr,BootstrapNoise) # np.savetxt('./data/perturbedValidData_squared_inclusion_{}Inflow.csv'.format(inletLoc),perturbedData, delimiter=',') - perturbedData = np.loadtxt('./data/perturbedValidData_squared_inclusion_{}Inflow.csv'.format(inletLoc),delimiter=',') - params = (nInitSamples,nTotalSamples,nBootstrapItr,perturbedData) - - #========================================================================== - #==================== Run main scripts for PA-B ======================= - #========================================================================== + perturbedData = np.loadtxt( + "./data/perturbedValidData_squared_inclusion_{}Inflow.csv".format(inletLoc), + delimiter=",", + ) + params = (nInitSamples, nTotalSamples, nBootstrapItr, perturbedData) + + # ========================================================================== + # ==================== Run main scripts for PA-B ======================= + # ========================================================================== # Set dir name for plots - OutputDir = (r'Outputs_BayesAnalysis_{}_inclusion_{}Inflow/'.format(inclusion,inletLoc)) - result_folder = './Results_15_12_2021_topInflow' # './' or './Results_05_10_2021' + OutputDir = f"Outputs_BayesAnalysis_{inclusion}_inclusion_{inletLoc}Inflow/" + result_folder = "./Results_09_01_2022_leftInflow" # './' or './Results_05_10_2021' # result_folder = './Results_07_12_2021_leftInflow' # result_folder = 'Results_07_12_2021_Circular' # ------------- Run scripts --------- - if inclusion == 'squared': - # Stokes-PN model with the averaged data - _, _ , BayesValid_PNM = stokespnm.run(paramsAvg, inletLoc=inletLoc,errorPerc=0.2, - PCEEDMethod=PCEExpDesignMethod) - - # Stokes-PN model without the averaged data - _, _, BayesValid_PNM_NA = stokespnm.run(params, averaging=False,inletLoc=inletLoc, - PCEEDMethod=PCEExpDesignMethod) - - - # StokesDarcy with Classical IC (Beaver-Joseph) - _, _, BayesValid_BJ = stokesdarcy.run(paramsAvg, couplingcond='BJ',inletLoc=inletLoc, - inclusion=inclusion, - errorPerc=0.2,PCEEDMethod=PCEExpDesignMethod) + # if inclusion == 'squared': + # Stokes-PN model with the averaged data + # _, _ , BayesValid_PNM = stokespnm.run(paramsAvg, inletLoc=inletLoc,errorPerc=0.2, + # PCEEDMethod=PCEExpDesignMethod) + + # # Stokes-PN model without the averaged data + # _, _, BayesValid_PNM_NA = stokespnm.run(params, averaging=False,inletLoc=inletLoc, + # PCEEDMethod=PCEExpDesignMethod) + # # StokesDarcy with Classical IC (Beaver-Joseph) + # _, _, BayesValid_BJ = stokesdarcy.run(paramsAvg, couplingcond='BJ',inletLoc=inletLoc, + # inclusion=inclusion, + # errorPerc=0.2,PCEEDMethod=PCEExpDesignMethod) # # StokesDarcy with Generalized (ER) IC - _, _, BayesValid_ER = stokesdarcy.run(paramsAvg, couplingcond='ER', inletLoc=inletLoc, - inclusion=inclusion,errorPerc=0.2, + _, _, BayesValid_ER = stokesdarcy.run(paramsAvg, couplingcond='ER', + inletLoc=inletLoc, + inclusion=inclusion, + errorPerc=0.2, PCEEDMethod=PCEExpDesignMethod) sys.exit() - + # ------------- Load the objects --------- # Stokes-PN model with the averaged data - BayesValid_PNM_logBME = np.loadtxt(result_folder+'/outputs_ffpm-stokespnm_{}_inclusion_{}Inflow/'.format(inclusion,inletLoc)+ - 'logBME_ffpm-stokespnm_{}_inclusion_{}Inflow-valid.csv'.format(inclusion,inletLoc)) - + BayesValid_PNM_logBME = np.loadtxt( + result_folder + + "/outputs_ffpm-stokespnm_{inclusion}_inclusion_{inletLoc}Inflow/" + + "logBME_ffpm-stokespnm_{inclusion}_inclusion_{inletLoc}Inflow-valid.csv" + ) + # Stokes-PN model without the averaged data - BayesValid_PNM_NA_logBME = np.loadtxt(result_folder+'/outputs_ffpm-stokespnmNA_{}_inclusion_{}Inflow/'.format(inclusion,inletLoc)+ - 'logBME_ffpm-stokespnmNA_{}_inclusion_{}Inflow-valid.csv'.format(inclusion,inletLoc)) - - + BayesValid_PNM_NA_logBME = np.loadtxt( + result_folder + + "/outputs_ffpm-stokespnmNA_{inclusion}_inclusion_{inletLoc}Inflow/" + + "logBME_ffpm-stokespnmNA_{inclusion}_inclusion_{inletLoc}Inflow-valid.csv" + ) + # StokesDarcy with Classical IC (Beaver-Joseph) - BayesValid_BJ_logBME = np.loadtxt(result_folder+'/outputs_ffpm-stokesdarcyBJ_{}_inclusion_{}Inflow/'.format(inclusion,inletLoc)+ - 'logBME_ffpm-stokesdarcyBJ_{}_inclusion_{}Inflow-valid.csv'.format(inclusion,inletLoc)) + BayesValid_BJ_logBME = np.loadtxt( + result_folder + + f"/outputs_ffpm-stokesdarcyBJ_{inclusion}_inclusion_{inletLoc}Inflow/" + + f"logBME_ffpm-stokesdarcyBJ_{inclusion}_inclusion_{inletLoc}Inflow-valid.csv" + ) # StokesDarcy with Generalized (ER) IC - BayesValid_ER_logBME = np.loadtxt(result_folder+'/outputs_ffpm-stokesdarcyER_{}_inclusion_{}Inflow/'.format(inclusion,inletLoc)+ - 'logBME_ffpm-stokesdarcyER_{}_inclusion_{}Inflow-valid.csv'.format(inclusion,inletLoc)) - - - #========================================================================== - #============= Bayes Factor Computation (only models) ================= - #========================================================================== + BayesValid_ER_logBME = np.loadtxt( + result_folder + + f"/outputs_ffpm-stokesdarcyER_{inclusion}_inclusion_{inletLoc}Inflow/" + + "logBME_ffpm-stokesdarcyER_{inclusion}_inclusion_{inletLoc}Inflow-valid.csv" + ) + + # ========================================================================== + # ============= Bayes Factor Computation (only models) ================= + # ========================================================================== # mkdir for plots - if not os.path.exists(OutputDir): os.makedirs(OutputDir) + if not os.path.exists(OutputDir): + os.makedirs(OutputDir) # ---------------- Validation ---------------- # Change ln scale to normal scale - BME_Dict= dict() - BME_Dict['Classical IC'] = np.exp(BayesValid_BJ_logBME) - BME_Dict['Generalized IC'] = np.exp(BayesValid_ER_logBME) - BME_Dict['Pore Network'] = np.exp(BayesValid_PNM_logBME) - - #========================================================================== - #==================== Bayes Factor plot ================================ - #========================================================================== + BME_Dict = dict() + BME_Dict["Classical IC"] = np.exp(BayesValid_BJ_logBME) + BME_Dict["Generalized IC"] = np.exp(BayesValid_ER_logBME) + BME_Dict["Pore Network"] = np.exp(BayesValid_PNM_logBME) + + # ========================================================================== + # ==================== Bayes Factor plot ================================ + # ========================================================================== # kdePlot only models - kdePlot_BayesFactor(BME_Dict, plotName='Valid') - - ## Plot with all models - BME_Dict['Pore Network SA'] = np.exp(BayesValid_PNM_NA_logBME) - kdePlot_BayesFactor(BME_Dict, plotName='Valid_with_PNM_SA') - - #========================================================================== - #==================== Model weight Computation ======================== - #========================================================================== - - all_ModelWeights,ModelWeights = cal_modelWeight(BME_Dict) + kdePlot_BayesFactor(BME_Dict, plotName="Valid") + + # Plot with all models + BME_Dict["Pore Network SA"] = np.exp(BayesValid_PNM_NA_logBME) + kdePlot_BayesFactor(BME_Dict, plotName="Valid_with_PNM_SA") + + # ========================================================================== + # ==================== Model weight Computation ======================== + # ========================================================================== + + all_ModelWeights, ModelWeights = cal_modelWeight(BME_Dict) # Box Plot # BoxPlot_ModelWeights(all_ModelWeights, plotName='BME_Valid_all') - BoxPlot_ModelWeights(ModelWeights, plotName='BME_Valid_BJ_ER_PNM') - - + BoxPlot_ModelWeights(ModelWeights, plotName="BME_Valid_BJ_ER_PNM") + # Normalize the DKL (Asumption: Model Prior weights are equal for models) # ValidDKL = np.vstack((BayesValid_BJ.KLD, BayesValid_ER.KLD, BayesValid_PNM.KLD, BayesValid_PNM_NA.KLD)) # ValidDKL = np.vstack((BayesValid_BJ.KLD, BayesValid_ER.KLD, BayesValid_PNM.KLD)) @@ -461,9 +539,9 @@ if __name__ == "__main__": # BoxPlot_ModelWeights(ValidModelWeightsDKL, plotName='DKL_Valid_BJ_ER_PNM') # BoxPlot_ModelWeights(ValidModelWeightsinfEntropy, plotName='infEntropy_Valid_BJ_ER_PNM') - #========================================================================== - #========= Model comparison with Theoretically Optimal Model (TOM)========= - #========================================================================== + # ========================================================================== + # ========= Model comparison with Theoretically Optimal Model (TOM)========= + # ========================================================================== # ---------------- Calibration ---------------- # ModelNames = ['Classical IC','Generalized IC','Pore Network'] @@ -473,8 +551,8 @@ if __name__ == "__main__": # logTOMBME_valid = kdePlot_BME(dof, ValidBME, ModelNames, "Valid") - #========================================================================== - #============= Bayes Factor Computation (models vs TOM) ================ - #========================================================================== - #BME_Dict['TOM'] = logTOMBME_valid - #kdePlot_BayesFactor(BME_Dict,BF_label='BJ/ER', plotName='Valid_with_TOM') + # ========================================================================== + # ============= Bayes Factor Computation (models vs TOM) ================ + # ========================================================================== + # BME_Dict['TOM'] = logTOMBME_valid + # kdePlot_BayesFactor(BME_Dict,BF_label='BJ/ER', plotName='Valid_with_TOM') diff --git a/BayesValidRox/tests/PA-A/ffpm_validation_stokesdarcy.py b/BayesValidRox/tests/PA-A/ffpm_validation_stokesdarcy.py index 815a9e6d9..95f43496f 100755 --- a/BayesValidRox/tests/PA-A/ffpm_validation_stokesdarcy.py +++ b/BayesValidRox/tests/PA-A/ffpm_validation_stokesdarcy.py @@ -1,25 +1,30 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Thu Aug 13 09:53:11 2020 -@author: farid +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Thu Aug 13 2020 """ -import sys, os, joblib +import sys +import os +import joblib import numpy as np import scipy.stats as stats import shutil import pandas as pd -try: - import cPickle as pickle -except ModuleNotFoundError: - import _pickle as pickle - import matplotlib -matplotlib.use('agg') + +matplotlib.use("agg") # Add BayesValidRox path -sys.path.insert(0,'./../../../BayesValidRox/') +sys.path.insert(0, "./../../../BayesValidRox/") from PyLink.PyLinkForwardModel import PyLinkForwardModel from surrogate_models.Input import Input @@ -28,6 +33,7 @@ from PostProcessing.PostProcessing import PostProcessing from BayesInference.BayesInference import BayesInference, Discrepancy from util.post_plot_PAPER import postPredictiveplot + def check_ranges(samples, BayesDF): """ This function checks if theta lies in the given ranges @@ -60,94 +66,115 @@ def check_ranges(samples, BayesDF): index.append(i) return index -def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc='top', PCEEDMethod='normal'): - print("\n" + "="*75) - print("Stochastic calibration and validation of {0}.".format('ffpm-stokesdarcy'+couplingcond)) - print("="*75 + "\n") +def run(params, couplingcond="BJ", inclusion="squared", inletLoc="top"): + + print("\n" + "=" * 75) + print(f"Stochastic calibration and validation of ffpm-stokesdarcy\ + {couplingcond}.") + print("=" * 75 + "\n") + + nInitSamples, nTotalSamples, nBootstrapItr, perturbedData = params - nInitSamples,nTotalSamples,nBootstrapItr,perturbedData = params - - #===================================================== - #============ COMPUTATIONAL MODEL ================ - #===================================================== + # ===================================================== + # ============ COMPUTATIONAL MODEL ================ + # ===================================================== Model = PyLinkForwardModel() - Model.nrCPUs = 4 #8 - Model.Type = 'PyLink' - - Model.Name = 'ffpm-stokesdarcy{}_{}_inclusion_{}Inflow'.format(couplingcond,inclusion,inletLoc) - - modelDir = './models/stokesdarcy/' - Model.InputFile = modelDir+'params_{}_{}_inclusion_{}Inflow.input'.format(couplingcond,inclusion,inletLoc) - Model.InputTemplate = modelDir+'params_{}_{}_inclusion_{}Inflow.tpl.input'.format(couplingcond,inclusion,inletLoc) - - - - Model.Command = "model_exe.py stokesdarcy -o {} {}".format(inclusion,Model.InputFile) + Model.nrCPUs = 4 # 8 + Model.Type = "PyLink" + + Model.Name = f"ffpm-stokesdarcy{couplingcond}_{inclusion}_inclusion_"\ + f"{inletLoc}Inflow" + + modelDir = "./models/stokesdarcy/" + Model.InputFile = f"{modelDir}params_{couplingcond}_{inclusion}_"\ + f"inclusion_{inletLoc}Inflow.input" + Model.InputTemplate = f"{modelDir}params_{couplingcond}_{inclusion}_"\ + f"inclusion_{inletLoc}Inflow.tpl.input" + Model.Command = f"model_exe.py stokesdarcy -o {inclusion} "\ + f"{Model.InputFile}" Model.ExecutionPath = os.getcwd() - Model.Output.Parser = 'read_ffpm' - Model.Output.Names = ['velocity [m/s]', 'p'] + Model.Output.Parser = "read_ffpm" + Model.Output.Names = ["velocity [m/s]", "p"] Model.Output.FileNames = ["ffpm_stokesdarcy_velocity_final.csv", "ffpm_stokesdarcy_p_final.csv"] # For Bayesian inversion - Model.MeasurementFile = 'data/stokesDataCalib_{}_inclusion_{}Inflow.csv'.format(inclusion,inletLoc) + Model.MeasurementFile = f"data/stokesDataCalib_{inclusion}_inclusion"\ + f"_{inletLoc}Inflow.csv" # Include the validation observation data - Model.MeasurementFileValid = 'data/stokesDataValid_{}_inclusion_{}Inflow.csv'.format(inclusion,inletLoc) + Model.MeasurementFileValid = f"data/stokesDataValid_{inclusion}_inclusion"\ + f"_{inletLoc}Inflow.csv" - #===================================================== - #========= PROBABILISTIC INPUT MODEL ============== - #===================================================== + # ===================================================== + # ========= PROBABILISTIC INPUT MODEL ============== + # ===================================================== Inputs = Input() - MCSize = 50000 - + MCSize = 100000 + # Assuming dependent input variables # Inputs.Rosenblatt = True - - Inputs.addMarginals() # VyMaxTop - Inputs.Marginals[0].Name = '$V^{top}$' # [5e-4, 1.5e-3] - Inputs.Marginals[0].InputValues = stats.uniform(loc=5.0e-04, scale=1.5e-3-5e-4).rvs(size=MCSize) - - Inputs.addMarginals() # CavityHeight - Inputs.Marginals[1].Name = '$\\Gamma$' #[0.0045, 0.0055] - # l = 0.5e-3 #Microscopic length scale >>>> l/4 nach oben und l/2 nach unten. - if couplingcond == 'BJ': - params = [4.9*1e-3, 5.1*1e-3] #[0.005-0.5*l, 0.005+0.25*l] + + Inputs.addMarginals() # VyMaxTop + Inputs.Marginals[0].Name = "$V^{top}$" # [5e-4, 1.5e-3] + Inputs.Marginals[0].InputValues = stats.uniform( + loc=5.0e-04, scale=1.5e-3 - 5e-4 + ).rvs(size=MCSize) + # Inputs.Marginals[0].DistType = 'unif' + # Inputs.Marginals[0].Parameters = [5e-4, 1.5e-3] + + Inputs.addMarginals() # CavityHeight + Inputs.Marginals[1].Name = "$\\Gamma$" # [0.0045, 0.0055] + if couplingcond == "BJ": + params = [4.9 * 1e-3, 5.1 * 1e-3] # [0.005-0.5*l, 0.005+0.25*l] else: - params = [5.0*1e-3, 5.1*1e-3] #[5.025*1e-3, 5.1*1e-3] [0.004975, 0.005+0.25*l] - Inputs.Marginals[1].InputValues = stats.uniform(loc=params[0], scale=params[1]-params[0]).rvs(size=MCSize) - - Inputs.addMarginals() # Permeability - Inputs.Marginals[2].Name = '$K$' # [1e-10, 1e-7] - Inputs.Marginals[2].InputValues = stats.uniform(loc=1e-10, scale=1e-8-1e-10).rvs(size=MCSize) - import chaospy - params = (1.5e-09, 1.0e-9) - Mu = np.log(params[0]**2 / np.sqrt(params[0]**2 + params[1]**2)) - Sigma = np.sqrt(np.log(1 + params[1]**2 / params[0]**2)) - # Inputs.Marginals[2].InputValues = chaospy.LogNormal(mu=Mu,sigma=Sigma).sample(MCSize) + params = [5.0 * 1e-3, 5.1 * 1e-3] + Inputs.Marginals[1].InputValues = stats.uniform( + loc=params[0], scale=params[1] - params[0] + ).rvs(size=MCSize) + # Inputs.Marginals[1].DistType = 'unif' + # Inputs.Marginals[1].Parameters = params + + Inputs.addMarginals() # Permeability + Inputs.Marginals[2].Name = "$K$" # [1e-10, 1e-8] + Inputs.Marginals[2].InputValues = stats.uniform(loc=1e-10, scale=1e-8-1e-10).rvs( + size=MCSize + ) + # Inputs.Marginals[2].DistType = 'unif' + # Inputs.Marginals[2].Parameters = [1e-10, 1e-8] + # import chaospy + + # params = (1.5e-09, 1.0e-9) + # Mu = np.log(params[0] ** 2 / np.sqrt(params[0] ** 2 + params[1] ** 2)) + # Sigma = np.sqrt(np.log(1 + params[1] ** 2 / params[0] ** 2)) + # Inputs.Marginals[2].InputValues = chaospy.LogNormal(mu=Mu,sigma=Sigma).sample(MCSize) # Inputs.Marginals[2].DistType = 'lognorm' # Inputs.Marginals[2].Parameters = params - if couplingcond == 'BJ': - Inputs.addMarginals() # AlphaBeaversJoseph - Inputs.Marginals[3].Name = '$\\alpha_{BJ}$' # [0.1, 4.0] - Inputs.Marginals[3].InputValues = stats.uniform(loc=0.1, scale=4.0-0.1).rvs(size=MCSize) - - #===================================================== - #========== DEFINITION OF THE METAMODEL ============ - #===================================================== + if couplingcond == "BJ": + Inputs.addMarginals() # AlphaBeaversJoseph + Inputs.Marginals[3].Name = "$\\alpha_{BJ}$" # [0.1, 4.0] + Inputs.Marginals[3].InputValues = stats.uniform(loc=0.1, scale=4.0-0.1).rvs( + size=MCSize + ) + # Inputs.Marginals[3].DistType = 'unif' + # Inputs.Marginals[3].Parameters = [0.1, 4.0] + + # ===================================================== + # ========== DEFINITION OF THE METAMODEL ============ + # ===================================================== MetaModelOpts = Metamodel(Inputs) - + # Select if you want to preserve the spatial/temporal depencencies - MetaModelOpts.DimRedMethod = 'PCA' + # MetaModelOpts.DimRedMethod = "PCA" # MetaModelOpts.varPCAThreshold = 99.99 # Select your metamodel method # 1) PCE (Polynomial Chaos Expansion) 2) GPE (Gaussian Process Emulator) # 3) PCEKriging (PCE + GPE) - MetaModelOpts.metaModel = 'PCE' + MetaModelOpts.metaModel = "PCE" # ------------------------------------------------ # ------------- PCE Specification ---------------- @@ -159,17 +186,17 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc # 5)FastARD: Fast Bayesian ARD Regression # 6)VBL: Variational Bayesian Learning # 7)EBL: Emperical Bayesian Learning - MetaModelOpts.RegMethod = 'FastARD' + MetaModelOpts.RegMethod = "FastARD" # Specify the max degree to be compared by the adaptive algorithm: # The degree with the lowest Leave-One-Out cross-validation (LOO) # error (or the highest score=1-LOO)estimator is chosen as the final # metamodel. - MetaModelOpts.MinPceDegree = 6 # default = 1 - MetaModelOpts.MaxPceDegree = 6 #9 + MetaModelOpts.MinPceDegree = 8 # default = 1 + MetaModelOpts.MaxPceDegree = 8 # 9 # q-quasi-norm 0<q<1 (default=1) - # MetaModelOpts.q = np.linspace(0.3,0.6,3) + MetaModelOpts.q = 0.6 # Print summary of the regression results # MetaModelOpts.DisplayFlag = True @@ -177,128 +204,133 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc # ------------------------------------------------ # ------ Experimental Design Configuration ------- # ------------------------------------------------ - # Generate an experimental design of size NrExpDesign based on a latin - # hypercube sampling of the input model or user-defined values of X and/or Y: + # Generate an experimental design MetaModelOpts.addExpDesign() # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.Method = PCEEDMethod + MetaModelOpts.ExpDesign.Method = "normal" MetaModelOpts.ExpDesign.NrSamples = nInitSamples # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) chebyshev(FT) # 7) korobov 8) grid(FT) 9) nested_grid(FT) 10)user - MetaModelOpts.ExpDesign.SamplingMethod = 'user' + MetaModelOpts.ExpDesign.SamplingMethod = "user" # Provide the experimental design object with a hdf5 file MetaModelOpts.ExpDesign.hdf5File = "ExpDesign_{}.hdf5".format(Model.Name) # For calculation of validation error import h5py + HDF5File = "ExpDesign_{}-testset_Calibration.hdf5".format(Model.Name) - ValidSets = h5py.File("./data/ValidationSets/"+HDF5File, 'r+') + ValidSets = h5py.File("./data/ValidationSets/" + HDF5File, "r+") MetaModelOpts.validSamples = np.array(ValidSets["EDX/init_"]) - validModelRuns = dict() + validSets = dict() # Extract x values - validModelRuns["x_values"] = dict() + validSets["x_values"] = dict() for varIdx, var in enumerate(Model.Output.Names): - validModelRuns["x_values"][var] = np.array(ValidSets["x_values/"+var]) + validSets["x_values"][var] = np.array(ValidSets["x_values/" + var]) for varIdx, var in enumerate(Model.Output.Names): - validModelRuns[var] = np.array(ValidSets["EDY/"+var+"/init_"]) + validSets[var] = np.array(ValidSets["EDY/" + var + "/init_"]) ValidSets.close() - MetaModelOpts.validModelRuns = validModelRuns + MetaModelOpts.validModelRuns = validSets # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Adaptive sparse arbitrary polynomial chaos expansion - print("\n" + "-"*40) + print("\n" + "-" * 40) print("PCE training for calibration.") - print("-"*40 + "\n") + print("-" * 40 + "\n") PCEModel = MetaModelOpts.create_metamodel(Model) - + # Remove zip file - if os.path.isfile(Model.Name + '.zip'): os.remove(Model.Name + '.zip') + if os.path.isfile(Model.Name + ".zip"): + os.remove(Model.Name + ".zip") # Save PCE models - with open('PCEModel_'+Model.Name+'.pkl', 'wb') as output: + with open("PCEModel_" + Model.Name + ".pkl", "wb") as output: joblib.dump(PCEModel, output, 2) # Load the objects - # with open('PCEModel_'+Model.Name+'.pkl', 'rb') as input: + # with open("PCEModel_" + Model.Name + ".pkl", "rb") as input: # PCEModel = joblib.load(input) - #===================================================== - #========= POST PROCESSING OF METAMODELS =========== - #===================================================== + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== PostPCE = PostProcessing(PCEModel) # Compute the moments and compare with the Monte-Carlo reference - PostPCE.plotMoments(xlabel='Point ID', plotType='bar') + PostPCE.plotMoments(xlabel="Point ID", plotType="bar") # Plot the sobol indices - sobol_cell, total_sobol = PostPCE.sobolIndicesPCE(xlabel='Point ID', plotType='bar') + _, _ = PostPCE.sobolIndicesPCE(xlabel="Point ID", plotType="bar") # Plot the evolution of the KLD,BME, and Modified LOOCV error - if MetaModelOpts.ExpDesign.Method == 'sequential': + if MetaModelOpts.ExpDesign.Method == "sequential": PostPCE.seqDesignDiagnosticPlots() # Compute and print RMSE error - PostPCE.accuracyCheckMetaModel(Samples=MetaModelOpts.validSamples, - validOutputsDict=validModelRuns) - - #===================================================== - #========= Bayesian inference (Calibration) ======== - #===================================================== + PostPCE.accuracyCheckMetaModel( + Samples=MetaModelOpts.validSamples, validOutputsDict=validSets + ) + # ===================================================== + # ========= Bayesian inference (Calibration) ======== + # ===================================================== BayesOptsCalib = BayesInference(PCEModel) - BayesOptsCalib.Name = 'Calib' + BayesOptsCalib.Name = "Calib" BayesOptsCalib.emulator = True - - # if couplingcond == 'ER': - # BayesOptsCalib.selectedIndices = {'velocity [m/s]':[0,1,2,3,4,5,6,7,8,9], - # 'p':[0,1,2]} - # BayesOptsCalib.ReqOutputType = ['velocity [m/s]'] + + # if couplingcond == "ER": + # BayesOptsCalib.selectedIndices = { + # "velocity [m/s]": [0, 1, 2, 3, 4, 5, 6, 7], + # "p": [0, 1, 2], + # } + # BayesOptsCalib.ReqOutputType = ["velocity [m/s]"] # Select the inference method - BayesOptsCalib.SamplingMethod = 'MCMC' + BayesOptsCalib.SamplingMethod = "MCMC" BayesOptsCalib.MCMCnwalkers = 30 - # BayesOptsCalib.MCMCnSteps = 10000 + BayesOptsCalib.MCMCnSteps = 20000 # BayesOptsCalib.MCMCverbose = True - BayesOptsCalib.MAP ='mean' + BayesOptsCalib.MAP = "mean" BayesOptsCalib.PlotPostDist = True - BayesOptsCalib.Corner_title_fmt = '.2e' + BayesOptsCalib.Corner_title_fmt = ".2e" BayesOptsCalib.PlotPostPred = False BayesOptsCalib.PlotMAPPred = False # ----- Define the discrepancy model ------- - calibMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFile))**2 + # calibMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFile)) ** 2 # calibNumErr = pd.read_csv('data/NumErrorCalib_stokesdarcy_{}_{}_inclusion_{}Inflow.csv'.format(couplingcond,inclusion,inletLoc))**2 - DiscrepancyOpts = Discrepancy('') - DiscrepancyOpts.Type = 'Gaussian' + DiscrepancyOpts = Discrepancy("") + DiscrepancyOpts.Type = "Gaussian" # DiscrepancyOpts.Parameters = calibNumErr#.add(calibNumErr,fill_value=0) # BayesOptsCalib.Discrepancy = DiscrepancyOpts delta_dict = {} Data = pd.read_csv(Model.MeasurementFile) - ED_Y= PCEModel.ExpDesign.Y + ED_Y = PCEModel.ExpDesign.Y for out in list(Data.keys())[1:]: try: data = Data[out].to_numpy()[~np.isnan(Data[out])] except: data = Data[out][~np.isnan(Data[out])] - + # Mean absolute error # delta_dict[out] = np.mean(abs(data - ED_Y[out]),axis=0) - # _,delta_dict[out] = np.linalg.eig(np.cov(((data-ED_Y[out])**2).T)) + # _,delta_dict[out] = np.linalg.eig(np.cov(((data-ED_Y[out]).T))) + # Mean squared error - delta_dict[out] = np.mean(((data - ED_Y[out]))**2,axis=0) - - errormodels = pd.DataFrame.from_dict(delta_dict,orient='index').T - DiscrepancyOpts.Parameters = errormodels/errormodels.sum() + delta_dict[out] = np.ones_like(data) + # delta_dict[out] = np.mean(((data - ED_Y[out])) ** 2, axis=0) + + errormodels = pd.DataFrame.from_dict(delta_dict, orient="index").T + DiscrepancyOpts.Parameters = errormodels #/ errormodels.sum() BayesOptsCalib.Discrepancy = DiscrepancyOpts print(DiscrepancyOpts.Parameters) @@ -306,19 +338,21 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc DiscOutputOpts = Input() # OutputName = 'velocity' DiscOutputOpts.addMarginals() - DiscOutputOpts.Marginals[0].Name = '$\sigma^2_{vel}$' - DiscOutputOpts.Marginals[0].DistType = 'unif' - DiscOutputOpts.Marginals[0].Parameters = [0.0,1e-7] - + DiscOutputOpts.Marginals[0].Name = "$\sigma^2_{vel}$" + DiscOutputOpts.Marginals[0].DistType = "unif" + DiscOutputOpts.Marginals[0].Parameters = [0.0, 1e-7] + # OutputName = 'pressure' DiscOutputOpts.addMarginals() - DiscOutputOpts.Marginals[1].Name = '$\sigma^2_{p}$' - DiscOutputOpts.Marginals[1].DistType = 'unif' - DiscOutputOpts.Marginals[1].Parameters = [0.0,1e-3] - - BayesOptsCalib.Discrepancy = {'known': DiscrepancyOpts, - 'infer': Discrepancy(DiscOutputOpts)} - + DiscOutputOpts.Marginals[1].Name = "$\sigma^2_{p}$" + DiscOutputOpts.Marginals[1].DistType = "unif" + DiscOutputOpts.Marginals[1].Parameters = [0.0, 1e-2] + + BayesOptsCalib.Discrepancy = { + "known": DiscrepancyOpts, + "infer": Discrepancy(DiscOutputOpts), + } + # -- (Option B) -- # BayesOptsCalib.errorModel = True # coord_vel = pd.read_csv('models/velocity_points.csv') @@ -334,37 +368,37 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc # DiscOutputOpts.Marginals[0].Name = '$\lambda_{vel}$' # DiscOutputOpts.Marginals[0].DistType = 'unif' # DiscOutputOpts.Marginals[0].Parameters = [0, 10] - + # # OutputName = 'sigma2_vel' # DiscOutputOpts.addMarginals() # DiscOutputOpts.Marginals[1].Name = '$\sigma^2_{vel}$' # DiscOutputOpts.Marginals[1].DistType = 'uniform' # DiscOutputOpts.Marginals[1].Parameters = [0,1e-5] - + # # OutputName = 'sigma2_vel' # DiscOutputOpts.addMarginals() # DiscOutputOpts.Marginals[2].Name = '$\sigma^2_{vel}$' # DiscOutputOpts.Marginals[2].DistType = 'uniform' # DiscOutputOpts.Marginals[2].Parameters = [0,1e-5] - + # # OutputName = 'lambda_p' # DiscOutputOpts.addMarginals() # DiscOutputOpts.Marginals[3].Name = '$\lambda_{p}$' # DiscOutputOpts.Marginals[3].DistType = 'unif' # DiscOutputOpts.Marginals[3].Parameters = [0, 10] - + # # OutputName = 'sigma2_p' # DiscOutputOpts.addMarginals() # DiscOutputOpts.Marginals[4].Name = '$\sigma^2_{p}$' # DiscOutputOpts.Marginals[4].DistType = 'unif' # DiscOutputOpts.Marginals[4].Parameters = [0,1e-3] - + # # OutputName = 'sigma2_p' # DiscOutputOpts.addMarginals() # DiscOutputOpts.Marginals[5].Name = '$\sigma^2_{p}$' # DiscOutputOpts.Marginals[5].DistType = 'unif' # DiscOutputOpts.Marginals[5].Parameters = [0,1e-3] - + # BayesOptsCalib.Discrepancy = Discrepancy(DiscOutputOpts) # BayesOptsCalib.Discrepancy = {'known': DiscrepancyOpts, # 'infer': Discrepancy(DiscOutputOpts)} @@ -373,119 +407,126 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc BayesCalib = BayesOptsCalib.create_Inference() # Save class objects - with open('PA_A_Bayes'+Model.Name+'.pkl', 'wb') as output: + with open("PA_A_Bayes" + Model.Name + ".pkl", "wb") as output: joblib.dump(BayesCalib, output, 2) # Load the objects - # with open('PA_A_Bayes'+Model.Name+'.pkl', 'rb') as input: + # with open("PA_A_Bayes" + Model.Name + ".pkl", "rb") as input: # BayesCalib = joblib.load(input) - #===================================================== - #================= Visualization =================== - #===================================================== + # ===================================================== + # ================= Visualization =================== + # ===================================================== # Plot posterior predictive - postPredictiveplot(PCEModel.ModelObj.Name, errorPerc, case='Calib', - inletLoc=inletLoc, bins=20) + postPredictiveplot(PCEModel.ModelObj.Name, case="Calib", inletLoc=inletLoc, + bins=20) - #===================================================== - #================== VALIDATION ===================== - #===================================================== + # ===================================================== + # ================== VALIDATION ===================== + # ===================================================== ValidInputs = Input() - ValidInputs.addMarginals() # VyMaxTop - ValidInputs.Marginals[0].Name = '$V^{top}$' - ValidInputs.Marginals[0].InputValues = BayesCalib.Posterior_df['$V^{top}$'] + ValidInputs.addMarginals() # VyMaxTop + ValidInputs.Marginals[0].Name = "$V^{top}$" + ValidInputs.Marginals[0].InputValues = BayesCalib.Posterior_df["$V^{top}$"] - ValidInputs.addMarginals() # CavityHeight - ValidInputs.Marginals[1].Name = '$\\Gamma$' - ValidInputs.Marginals[1].InputValues = BayesCalib.Posterior_df['$\\Gamma$'] + ValidInputs.addMarginals() # CavityHeight + ValidInputs.Marginals[1].Name = "$\\Gamma$" + ValidInputs.Marginals[1].InputValues = BayesCalib.Posterior_df["$\\Gamma$"] - ValidInputs.addMarginals() # Permeability - ValidInputs.Marginals[2].Name = '$K$' - ValidInputs.Marginals[2].InputValues = BayesCalib.Posterior_df['$K$'] + ValidInputs.addMarginals() # Permeability + ValidInputs.Marginals[2].Name = "$K$" + ValidInputs.Marginals[2].InputValues = BayesCalib.Posterior_df["$K$"] - if couplingcond == 'BJ': - ValidInputs.addMarginals() # AlphaBeaversJoseph - ValidInputs.Marginals[3].Name = '$\\alpha_{BJ}$' - ValidInputs.Marginals[3].InputValues = BayesCalib.Posterior_df['$\\alpha_{BJ}$'] + if couplingcond == "BJ": + ValidInputs.addMarginals() # AlphaBeaversJoseph + ValidInputs.Marginals[3].Name = "$\\alpha_{BJ}$" + ValidInputs.Marginals[3].InputValues = BayesCalib.Posterior_df["$\\alpha_{BJ}$"] # ----------------------------------------------------------------- - print("\n" + "-"*40) + print("\n" + "-" * 40) print("PCE training for validation.") - print("-"*40 + "\n") + print("-" * 40 + "\n") import copy + ValidModel = copy.deepcopy(Model) - ValidModel.Name = '{}-valid'.format(Model.Name) + ValidModel.Name = "{}-valid".format(Model.Name) ValidMetaModelOpts = copy.deepcopy(MetaModelOpts) ValidMetaModelOpts.Inputs = ValidInputs - ValidMetaModelOpts.ExpDesign.Method = 'normal' + ValidMetaModelOpts.ExpDesign.Method = "normal" ValidMetaModelOpts.ExpDesign.InputObj = ValidInputs ValidMetaModelOpts.ExpDesign.Y = None - ValidMetaModelOpts.ExpDesign.SamplingMethod = 'random' #'random' 'user' - ValidMetaModelOpts.ExpDesign.hdf5File = None#"ExpDesign_{}-valid.hdf5".format(Model.Name)#None - + ValidMetaModelOpts.ExpDesign.SamplingMethod = "random" # 'random' 'user' + ValidMetaModelOpts.ExpDesign.hdf5File = None + # ValidMetaModelOpts.ExpDesign.hdf5File = f"ExpDesign_{Model.Name}-valid.hdf5" + import h5py + HDF5File = "ExpDesign_{}-testset_Validation.hdf5".format(Model.Name) - ValidSets = h5py.File("./data/ValidationSets/"+HDF5File, 'r+') - parNames = list(BayesCalib.Posterior_df.keys())[:PCEModel.NofPa] - indices = check_ranges(np.array(ValidSets["EDX/init_"]), BayesCalib.Posterior_df[parNames]) + ValidSets = h5py.File("./data/ValidationSets/" + HDF5File, "r+") + parNames = list(BayesCalib.Posterior_df.keys())[: PCEModel.NofPa] + indices = check_ranges( + np.array(ValidSets["EDX/init_"]), BayesCalib.Posterior_df[parNames] + ) ValidMetaModelOpts.validSamples = np.array(ValidSets["EDX/init_"])[indices] - validModelRuns = dict() - + validSets = dict() + # Extract x values - validModelRuns["x_values"] = dict() + validSets["x_values"] = dict() for varIdx, var in enumerate(Model.Output.Names): - validModelRuns["x_values"][var] = np.array(ValidSets["x_values/"+var]) + validSets["x_values"][var] = np.array(ValidSets["x_values/" + var]) for varIdx, var in enumerate(Model.Output.Names): - validModelRuns[var] = np.array(ValidSets["EDY/"+var+"/init_"])[indices] + validSets[var] = np.array(ValidSets["EDY/" + var + "/init_"])[indices] ValidSets.close() - ValidMetaModelOpts.validModelRuns = validModelRuns - + ValidMetaModelOpts.validModelRuns = validSets + # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Train the meta model ValidPCEModel = ValidMetaModelOpts.create_metamodel(ValidModel) # Remove zip file - if os.path.isfile(ValidModel.Name + '.zip'): os.remove(ValidModel.Name + '.zip') + if os.path.isfile(ValidModel.Name + ".zip"): + os.remove(ValidModel.Name + ".zip") # Save PCE models - with open('PCEModel_'+ValidModel.Name+'.pkl', 'wb') as output: + with open("PCEModel_" + ValidModel.Name + ".pkl", "wb") as output: joblib.dump(ValidPCEModel, output, 2) # Load the objects - # with open('PCEModel_'+ValidModel.Name+'.pkl', 'rb') as input: + # with open("PCEModel_" + ValidModel.Name + ".pkl", "rb") as input: # ValidPCEModel = joblib.load(input) - #===================================================== - #========= POST PROCESSING OF METAMODELS =========== - #===================================================== - ValidPostPCE = PostProcessing(ValidPCEModel, Name='valid') + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== + ValidPostPCE = PostProcessing(ValidPCEModel, Name="valid") # Compute the moments and compare with the Monte-Carlo reference - ValidPostPCE.plotMoments(xlabel='Point ID', plotType='bar') + ValidPostPCE.plotMoments(xlabel="Point ID", plotType="bar") # Plot the sobol indices - sobol_cell, total_sobol = ValidPostPCE.sobolIndicesPCE(xlabel='Point ID', plotType='bar') + _, _ = ValidPostPCE.sobolIndicesPCE(xlabel="Point ID", plotType="bar") # Plot the evolution of the KLD,BME, and Modified LOOCV error - if ValidPCEModel.ExpDesign.Method == 'sequential': + if ValidPCEModel.ExpDesign.Method == "sequential": ValidPostPCE.seqDesignDiagnosticPlots() # Compute and print RMSE error try: - ValidPostPCE.accuracyCheckMetaModel(Samples=ValidMetaModelOpts.validSamples, - validOutputsDict=validModelRuns) + ValidPostPCE.accuracyCheckMetaModel( + Samples=ValidMetaModelOpts.validSamples, validOutputsDict=validSets + ) except: # Include the prediction errors in the likelihood ValidPCEModel.RMSE = None - #===================================================== - #========= Bayesian inference (Validation) ========= - #===================================================== + # ===================================================== + # ========= Bayesian inference (Validation) ========= + # ===================================================== BayesOptsValid = BayesInference(ValidPCEModel) - BayesOptsValid.Name = 'Valid' + BayesOptsValid.Name = "Valid" BayesOptsValid.emulator = True # Bootstrap for BME calulations @@ -493,24 +534,23 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc BayesOptsValid.BootstrapItrNr = nBootstrapItr BayesOptsValid.Samples = None BayesOptsValid.perturbedData = perturbedData - + # Bayesian cross validation # BayesOptsValid.BayesLOOCV = True - BayesOptsValid.MAP = 'mean' + BayesOptsValid.MAP = "mean" BayesOptsValid.PlotPostDist = True - BayesOptsValid.Corner_title_fmt = '.2e' + BayesOptsValid.Corner_title_fmt = ".2e" BayesOptsValid.PlotPostPred = False - # # ----- Define the discrepancy model ------- - validMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFileValid ))**2 + # validMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFileValid)) ** 2 # validNumErr = pd.read_csv('data/NumErrorValid_stokesdarcy_{}_{}_inclusion_{}Inflow.csv'.format(couplingcond,inclusion,inletLoc))**2 - DiscrepancyOpts = Discrepancy('') - DiscrepancyOpts.Type = 'Gaussian' + DiscrepancyOpts = Discrepancy("") + DiscrepancyOpts.Type = "Gaussian" # DiscrepancyOpts.Parameters = validMeasuError#.add(validNumErr,fill_value=0) - + # Pass expected inferred sigma2s # data = pd.read_csv(Model.MeasurementFileValid) # factors = ((data**2)/((data**2).sum())) @@ -522,22 +562,27 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc data = Data[out].to_numpy()[~np.isnan(Data[out])] except: data = Data[out][~np.isnan(Data[out])] - + # Mean absolute error # delta_dict[out] = np.mean(abs(data - ED_Y[out]),axis=0) # Mean squared error - delta_dict[out] = np.mean((data - ED_Y[out])**2,axis=0) - - errormodels = pd.DataFrame.from_dict(delta_dict,orient='index').T - factors = errormodels/errormodels.sum() - - DiscrepancyOpts.Parameters = {'velocity [m/s]':np.mean(BayesCalib.Posterior_df['$\sigma^2_{vel}$'].values)*factors['velocity [m/s]'].values, - 'p':np.mean(BayesCalib.Posterior_df['$\sigma^2_{p}$'].values)*factors['p'].values} - + delta_dict[out] = np.mean((data - ED_Y[out]) ** 2, axis=0) + delta_dict[out] = np.ones_like(data) + + errormodels = pd.DataFrame.from_dict(delta_dict, orient="index").T + factors = errormodels #/ errormodels.sum() + + DiscrepancyOpts.Parameters = { + "velocity [m/s]": np.mean(BayesCalib.Posterior_df["$\sigma^2_{vel}$"].values) + * factors["velocity [m/s]"].values, + "p": np.mean(BayesCalib.Posterior_df["$\sigma^2_{p}$"].values) + * factors["p"].values, + } + BayesOptsValid.Discrepancy = DiscrepancyOpts # BayesOptsValid.selectedIndices = {'velocity [m/s]':[2,3,4,5,6,7], # 'p':[0,1]} - + # Construct error model based on discrepancy between ED and Data # BayesOptsValid.errorModel = True # coord_vel = pd.read_csv('models/velocity_points.csv') @@ -547,75 +592,80 @@ def run(params, errorPerc=0.05, couplingcond='BJ', inclusion='squared', inletLoc # BayesOptsValid.BiasInputs = {'velocity [m/s]':coord_vel, # 'p':coord_p} # BayesOptsValid.errorMetaModel = BayesCalib.errorMetaModel - + # ----- Strat Bayesian inference ------- BayesValid = BayesOptsValid.create_Inference() - #===================================================== - #============== Save class objects ================= - #===================================================== - with open('PA_A_Bayes'+ValidModel.Name+'.pkl', 'wb') as output: + # ===================================================== + # ============== Save class objects ================= + # ===================================================== + with open("PA_A_Bayes" + ValidModel.Name + ".pkl", "wb") as output: joblib.dump(BayesValid, output, 2) - np.savetxt('logBME_'+ValidModel.Name+'.csv',BayesValid.logBME) - #===================================================== - #================= Visualization =================== - #===================================================== + + np.savetxt("logBME_" + ValidModel.Name + ".csv", BayesValid.logBME) + # ===================================================== + # ================= Visualization =================== + # ===================================================== # Plot posterior predictive - postPredictiveplot(ValidPCEModel.ModelObj.Name, errorPerc, case='Valid', - inletLoc=inletLoc, bins=20) + postPredictiveplot( + ValidPCEModel.ModelObj.Name, case="Valid", inletLoc=inletLoc, bins=20 + ) - #===================================================== - #======== Moving folders to a new folder =========== - #===================================================== + # ===================================================== + # ======== Moving folders to a new folder =========== + # ===================================================== # Create a output directory - outdir = 'outputs_' + Model.Name + outdir = "outputs_" + Model.Name os.makedirs(outdir, exist_ok=True) # Move files to a main folder - for case in ['Calib', 'Valid']: - metaModel = PCEModel if case == 'Calib' else ValidPCEModel + for case in ["Calib", "Valid"]: + metaModel = PCEModel if case == "Calib" else ValidPCEModel ModelName = metaModel.ModelObj.Name - files = ['ExpDesign_'+ModelName+'.hdf5', - 'PCEModel_'+ModelName+'.pkl', - 'PA_A_Bayes'+ModelName+'.pkl'] - - if case == 'Valid': - files.append('logBME_'+ValidModel.Name+'.csv') + files = [ + "ExpDesign_" + ModelName + ".hdf5", + "PCEModel_" + ModelName + ".pkl", + "PA_A_Bayes" + ModelName + ".pkl", + ] - for file in files: - shutil.move("./"+file, outdir+'/'+file) + if case == "Valid": + files.append("logBME_" + ValidModel.Name + ".csv") + for file in files: + shutil.move("./" + file, outdir + "/" + file) # Move folders to a main forlder - folders = ['Outputs_PostProcessing_'+case.lower(), - 'Outputs_Bayes_'+ModelName+'_'+case, - 'postPred_'+ModelName+'_'+case] + folders = [ + "Outputs_PostProcessing_" + case.lower(), + "Outputs_Bayes_" + ModelName + "_" + case, + "postPred_" + ModelName + "_" + case, + ] for folder in folders: try: - shutil.move("./"+folder, outdir) + shutil.move("./" + folder, outdir) except: pass return PCEModel, BayesCalib, BayesValid -#========================================================================== -#======================== Set Parameters ============================== -#========================================================================== + +# ========================================================================== +# ======================== Set Parameters ============================== +# ========================================================================== # ---------- set BayesValidRox params ---------- -# PCEExpDesignMethod = 'normal' # 'normal' or 'sequential' -# nInitSamples = 200 #50 Initial No. of orig. Model runs for surrogate training -# nTotalSamples = 75 #100 Total No. of orig. Model runs for surrogate training -# nBootstrapItr = 1000 # No. of bootstraping iterations for Bayesian analysis -# BootstrapNoise = 0.0025 # Noise amount for bootstraping in Bayesian analysis +# PCEExpDesignMethod = "normal" # 'normal' or 'sequential' +# nInitSamples = 200 # 50 Initial No. of orig. Model runs for surrogate training +# nTotalSamples = 75 # 100 Total No. of orig. Model runs for surrogate training +# nBootstrapItr = 1000 # No. of bootstraping iterations for Bayesian analysis +# BootstrapNoise = 0.0025 # Noise amount for bootstraping in Bayesian analysis # # perturbedData = np.loadtxt('./data/perturbedValidDataAvg_squared_inclusion_leftInflow.csv',delimiter=',') -# perturbedData = np.loadtxt('./data/perturbedValidDataAvg_squared_inclusion_topInflow.csv',delimiter=',') +# perturbedData = np.loadtxt("./data/perturbedValidDataAvg_squared_inclusion_topInflow.csv", delimiter=",") -# params = (nInitSamples,nTotalSamples,nBootstrapItr,perturbedData) +# params = (nInitSamples, nTotalSamples, nBootstrapItr, perturbedData) -#========================================================================== -#==================== Run main scripts for PA-B ======================= -#========================================================================== -# PCEModel, BayesCalib, BayesValid = run(params,couplingcond='BJ', PCEEDMethod=PCEExpDesignMethod) -# PCEModel = run(params,couplingcond='ER',errorPerc=0.05,inletLoc='top', -# inclusion='squared', PCEEDMethod=PCEExpDesignMethod) +# ========================================================================== +# ==================== Run main scripts for PA-B ======================= +# ========================================================================== +# PCEModel, BayesCalib, BayesValid = run(params,couplingcond='BJ') +# PCEModel = run(params, couplingcond="BJ", inletLoc="top", inclusion="squared") diff --git a/BayesValidRox/tests/PA-A/ffpm_validation_stokespnm.py b/BayesValidRox/tests/PA-A/ffpm_validation_stokespnm.py index b6720694b..0dcfb0d7e 100755 --- a/BayesValidRox/tests/PA-A/ffpm_validation_stokespnm.py +++ b/BayesValidRox/tests/PA-A/ffpm_validation_stokespnm.py @@ -1,24 +1,30 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Thu Aug 13 09:53:11 2020 -@author: farid +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Thu Aug 13 2020 """ -import sys, os, joblib +import sys +import os +import joblib import numpy as np import scipy.stats as stats import pandas as pd import shutil -try: - import cPickle as pickle -except ModuleNotFoundError: - import _pickle as pickle - +import h5py import matplotlib -matplotlib.use('agg') + +matplotlib.use("agg") # Add BayesValidRox path -sys.path.insert(0,'./../../../BayesValidRox/') +sys.path.insert(0, "./../../../BayesValidRox/") from PyLink.PyLinkForwardModel import PyLinkForwardModel from surrogate_models.Input import Input @@ -26,8 +32,10 @@ from surrogate_models.surrogate_models import Metamodel from PostProcessing.PostProcessing import PostProcessing from BayesInference.BayesInference import BayesInference, Discrepancy from util.post_plot_PAPER import postPredictiveplot + # from post_plot_PAPER import postPredictiveplot + def check_ranges(samples, BayesDF): """ This function checks if theta lies in the given ranges @@ -59,92 +67,120 @@ def check_ranges(samples, BayesDF): index.append(i) return index -def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='normal'): - print("\n" + "="*75) - print("Stochastic calibration and validation of {0}.".format('ffpm-stokespnm')) - print("="*75 + "\n") - - nInitSamples,nTotalSamples,nBootstrapItr,perturbedData = params +def run(params, averaging=True, inletLoc="top"): if averaging: - calib_data_csv_path = 'data/stokesDataCalib_squared_inclusion_{}Inflow.csv'.format(inletLoc) - valid_data_csv_path = 'data/stokesDataValid_squared_inclusion_{}Inflow.csv'.format(inletLoc) + modelName = f"ffpm-stokespnm_squared_inclusion_{inletLoc}Inflow" else: - calib_data_csv_path = 'data/stokesDataCalib_squared_inclusion_{}Inflow_without_averaging.csv'.format(inletLoc) - valid_data_csv_path = 'data/stokesDataValid_squared_inclusion_{}Inflow_without_averaging.csv'.format(inletLoc) + modelName = f"ffpm-stokespnmNA_squared_inclusion_{inletLoc}Inflow" + + print("\n" + "=" * 75) + print(f"Stochastic calibration and validation of {modelName}.") + print("=" * 75 + "\n") - #===================================================== - #============ COMPUTATIONAL MODEL ================ - #===================================================== + nInitSamples, nTotalSamples, nBootstrapItr, perturbedData = params + + if averaging: + calib_data_path = "data/stokesDataCalib_squared_inclusion_"\ + f"{inletLoc}Inflow.csv" + valid_data_path = f"data/stokesDataValid_squared_inclusion_"\ + f"{inletLoc}Inflow.csv" + else: + calib_data_path = "data/stokesDataCalib_squared_inclusion_"\ + f"{inletLoc}Inflow_without_averaging.csv" + valid_data_path = f"data/stokesDataValid_squared_inclusion_"\ + f"{inletLoc}Inflow_without_averaging.csv" + + # ===================================================== + # ============ COMPUTATIONAL MODEL ================ + # ===================================================== Model = PyLinkForwardModel() # Model.nrCPUs = 15 - Model.Type = 'PyLink' + Model.Type = "PyLink" + Model.Name = modelName if averaging: - Model.Name = 'ffpm-stokespnm_squared_inclusion_{}Inflow'.format(inletLoc) + Model.Name = f"ffpm-stokespnm_squared_inclusion_{inletLoc}Inflow" else: - Model.Name = 'ffpm-stokespnmNA_squared_inclusion_{}Inflow'.format(inletLoc) - - modelDir = './models/stokespnm/' - Model.InputFile = modelDir+'params_squared_inclusion_{}Inflow.input'.format(inletLoc) - Model.InputTemplate = modelDir+'params_squared_inclusion_{}Inflow.tpl.input'.format(inletLoc) + Model.Name = f"ffpm-stokespnmNA_squared_inclusion_{inletLoc}Inflow" + modelDir = "./models/stokespnm/" + Model.InputFile = f"{modelDir}params_squared_inclusion_{inletLoc}"\ + "Inflow.input" + Model.InputTemplate = f"{modelDir}params_squared_inclusion_{inletLoc}"\ + "Inflow.tpl.input" - Model.Command = "model_exe.py stokespnm {} --averaging {}".format(Model.InputFile,averaging) + Model.Command = f"model_exe.py stokespnm {Model.InputFile}"\ + f"--averaging {averaging}" Model.ExecutionPath = os.getcwd() - Model.Output.Parser = 'read_ffpm' - Model.Output.Names = ['velocity [m/s]', 'p'] - Model.Output.FileNames = ["ffpm_stokespnm_velocity_final.csv","ffpm_stokespnm_p_final.csv"] + Model.Output.Parser = "read_ffpm" + Model.Output.Names = ["velocity [m/s]", "p"] + Model.Output.FileNames = ["ffpm_stokespnm_velocity_final.csv", + "ffpm_stokespnm_p_final.csv"] # For Bayesian inversion - Model.MeasurementFile = calib_data_csv_path + Model.MeasurementFile = calib_data_path # Include the validation observation data - Model.MeasurementFileValid = valid_data_csv_path + Model.MeasurementFileValid = valid_data_path - #===================================================== - #========= PROBABILISTIC INPUT MODEL ============== - #===================================================== + # ===================================================== + # ========= PROBABILISTIC INPUT MODEL ============== + # ===================================================== Inputs = Input() MCSize = 50000 # Assuming dependent input variables # Inputs.Rosenblatt = True - Inputs.addMarginals() #VyMaxTop - Inputs.Marginals[0].Name = '$V^{top}$' # [5e-4, 1.5e-3] - Inputs.Marginals[0].InputValues = stats.uniform(loc=5.0e-04, scale=1.5e-03-5.0e-04).rvs(size=MCSize) - - Inputs.addMarginals() #TransmissibilityTotal '$g_{t,}$' - Inputs.Marginals[1].Name = '$g_{ij}$' # [1.0e-7, 1.0e-05] - Inputs.Marginals[1].InputValues = stats.uniform(loc=1e-7, scale=1e-5-1e-7).rvs(size=MCSize) - + # VyMaxTop + Inputs.addMarginals() + Inputs.Marginals[0].Name = "$V^{top}$" # [5e-4, 1.5e-3] + Inputs.Marginals[0].InputValues = stats.uniform( + loc=5.0e-04, scale=1.5e-03 - 5.0e-04 + ).rvs(size=MCSize) + # Inputs.Marginals[0].DistType = 'unif' + # Inputs.Marginals[0].Parameters = [5e-4, 1.5e-3] + + # TransmissibilityTotal '$g_{ij}$' + Inputs.addMarginals() + Inputs.Marginals[1].Name = "$g_{ij}$" # [1.0e-7, 1.0e-05] + Inputs.Marginals[1].InputValues = stats.uniform(loc=1e-7, scale=1e-5-1e-7).rvs( + size=MCSize + ) + # Inputs.Marginals[1].DistType = 'unif' + # Inputs.Marginals[1].Parameters = [1e-7, 1e-05] # Inputs.addMarginals() #TransmissibilityThroat '$g_{t,ij}$' # Inputs.Marginals[1].Name = '$g_{t,ij}$' # [1.0e-7, 1.0e-04] # Inputs.Marginals[1].InputValues = stats.uniform(loc=1e-7, scale=1e-4-1e-7).rvs(size=MCSize) - + # Inputs.addMarginals() #TransmissibilityHalfPore '$g_{p,i}$' # Inputs.Marginals[2].Name = '$g_{p,i}$' # [1.0e-7, 1.0e-04] # Inputs.Marginals[2].InputValues = stats.uniform(loc=1e-7, scale=1e-4-1e-7).rvs(size=MCSize) - Inputs.addMarginals() #'$\\beta_{pore}$' - Inputs.Marginals[2].Name = '$\\beta_{pore}$' # [1000.0, 100000.0] - Inputs.Marginals[2].InputValues = stats.uniform(loc=1e3, scale=1e5-1e3).rvs(size=MCSize) - - #===================================================== - #========== DEFINITION OF THE METAMODEL ============ - #===================================================== + # '$\\beta_{pore}$' + Inputs.addMarginals() + Inputs.Marginals[2].Name = "$\\beta_{pore}$" # [1000.0, 100000.0] + Inputs.Marginals[2].InputValues = stats.uniform(loc=1e3, scale=1e5-1e3).rvs( + size=MCSize + ) + # Inputs.Marginals[2].DistType = 'unif' + # Inputs.Marginals[2].Parameters = [1e3, 1e5] + + # ===================================================== + # ========== DEFINITION OF THE METAMODEL ============ + # ===================================================== MetaModelOpts = Metamodel(Inputs) - + # Select if you want to preserve the spatial/temporal depencencies - MetaModelOpts.DimRedMethod = 'PCA' + # MetaModelOpts.DimRedMethod = "PCA" # MetaModelOpts.varPCAThreshold = 99.99 # Select your metamodel method # 1) PCE (Polynomial Chaos Expansion) 2) GPE (Gaussian Process Emulator) # 3) PCEKriging (PCE + GPE) - MetaModelOpts.metaModel = 'PCE' + MetaModelOpts.metaModel = "PCE" # ------------------------------------------------ # ------------- PCE Specification ---------------- @@ -156,14 +192,14 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # 5)FastARD: Fast Bayesian ARD Regression # 6)VBL: Variational Bayesian Learning # 7)EBL: Emperical Bayesian Learning - MetaModelOpts.RegMethod = 'FastARD' + MetaModelOpts.RegMethod = "FastARD" # Specify the max degree to be compared by the adaptive algorithm: # The degree with the lowest Leave-One-Out cross-validation (LOO) # error (or the highest score=1-LOO)estimator is chosen as the final # metamodel. - MetaModelOpts.MinPceDegree = 6 # default = 1 9 - MetaModelOpts.MaxPceDegree = 6 #9 + MetaModelOpts.MinPceDegree = 9 # default = 1 9 + MetaModelOpts.MaxPceDegree = 9 # 9 # q-quasi-norm 0<q<1 (default=1) # MetaModelOpts.q = np.linspace(0.3,0.6,3) @@ -174,88 +210,88 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # ------------------------------------------------ # ------ Experimental Design Configuration ------- # ------------------------------------------------ - # Generate an experimental design of size NrExpDesign based on a latin - # hypercube sampling of the input model or user-defined values of X and/or Y: + # Generate an experimental design MetaModelOpts.addExpDesign() # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.Method = PCEEDMethod + MetaModelOpts.ExpDesign.Method = "normal" MetaModelOpts.ExpDesign.NrSamples = nInitSamples # Sampling methods - # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) chebyshev(FT) - # 7) korobov 8) grid(FT) 9) nested_grid(FT) 10)user - MetaModelOpts.ExpDesign.SamplingMethod = 'latin_hypercube' + # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6)user + MetaModelOpts.ExpDesign.SamplingMethod = "user" # Provide the experimental design object with a hdf5 file - # MetaModelOpts.ExpDesign.hdf5File = "ExpDesign_{}.hdf5".format(Model.Name) + MetaModelOpts.ExpDesign.hdf5File = f"ExpDesign_{Model.Name}.hdf5" # For calculation of validation error for SeqDesign import h5py - HDF5File = "ExpDesign_{}-testset_Calibration.hdf5".format(Model.Name) - ValidSets = h5py.File("./data/ValidationSets/"+HDF5File, 'r+') + HDF5File = f"ExpDesign_{Model.Name}-testset_Calibration.hdf5" + ValidSets = h5py.File(f"./data/ValidationSets/{HDF5File}", "r+") MetaModelOpts.validSamples = np.array(ValidSets["EDX/init_"]) - validModelRuns = dict() + validSets = dict() # Extract x values - validModelRuns["x_values"] = dict() + validSets["x_values"] = dict() for varIdx, var in enumerate(Model.Output.Names): - validModelRuns["x_values"][var] = np.array(ValidSets["x_values/"+var]) + validSets["x_values"][var] = np.array(ValidSets[f"x_values/{var}"]) for varIdx, var in enumerate(Model.Output.Names): - validModelRuns[var] = np.array(ValidSets["EDY/"+var+"/init_"]) + validSets[var] = np.array(ValidSets[f"EDY/{var}/init_"]) ValidSets.close() - MetaModelOpts.validModelRuns = validModelRuns + MetaModelOpts.validModelRuns = validSets # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Adaptive sparse arbitrary polynomial chaos expansion - print("\n" + "-"*40) + print("\n" + "-" * 40) print("PCE training for calibration.") - print("-"*40 + "\n") + print("-" * 40 + "\n") PCEModel = MetaModelOpts.create_metamodel(Model) # Remove zip file - if os.path.isfile(Model.Name + '.zip'): os.remove(Model.Name + '.zip') + if os.path.isfile(Model.Name + ".zip"): + os.remove(Model.Name + ".zip") # Save PCE models - with open('PCEModel_'+Model.Name+'.pkl', 'wb') as output: + with open(f"PCEModel_{Model.Name}.pkl", "wb") as output: joblib.dump(PCEModel, output, 2) # Load the objects - # with open('PCEModel_'+Model.Name+'.pkl', 'rb') as input: + # with open(f'PCEModel_{Model.Name}.pkl', 'rb') as input: # PCEModel = joblib.load(input) - #===================================================== - #========= POST PROCESSING OF METAMODELS =========== - #===================================================== + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== PostPCE = PostProcessing(PCEModel) # Compute the moments and compare with the Monte-Carlo reference - PostPCE.plotMoments(xlabel='Point ID', plotType='bar') + PostPCE.plotMoments(xlabel="Point ID", plotType="bar") # Plot the sobol indices - sobol_cell, total_sobol = PostPCE.sobolIndicesPCE(xlabel='Point ID', plotType='bar') + _, _ = PostPCE.sobolIndicesPCE(xlabel="Point ID", plotType="bar") # Plot the evolution of the KLD,BME, and Modified LOOCV error - if MetaModelOpts.ExpDesign.Method == 'sequential': + if MetaModelOpts.ExpDesign.Method == "sequential": PostPCE.seqDesignDiagnosticPlots() # Compute and print RMSE error - PostPCE.accuracyCheckMetaModel(Samples=MetaModelOpts.validSamples, - validOutputsDict=validModelRuns) + PostPCE.accuracyCheckMetaModel( + Samples=MetaModelOpts.validSamples, validOutputsDict=validSets + ) - #===================================================== - #========= Bayesian inference (Calibration) ======== - #===================================================== + # ===================================================== + # ========= Bayesian inference (Calibration) ======== + # ===================================================== BayesOptsCalib = BayesInference(PCEModel) - BayesOptsCalib.Name = 'Calib' + BayesOptsCalib.Name = "Calib" BayesOptsCalib.emulator = True # Select the inference method - BayesOptsCalib.SamplingMethod = 'MCMC' + BayesOptsCalib.SamplingMethod = "MCMC" BayesOptsCalib.MCMCnwalkers = 30 # BayesOptsCalib.selectedIndices = {'velocity [m/s]':[0,2,3,4,6,7,8,9], # 'p':[0,1,2]} @@ -267,19 +303,20 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # 35237.668793254459160]) # BayesOptsCalib.MCMCinitSamples = theta_MAP - BayesOptsCalib.MAP ='mean' + BayesOptsCalib.MAP = "mean" BayesOptsCalib.PlotPostDist = True - BayesOptsCalib.Corner_title_fmt = '.2e' + BayesOptsCalib.Corner_title_fmt = ".2e" BayesOptsCalib.PlotPostPred = False BayesOptsCalib.PlotMAPPred = False # ----- Define the discrepancy model ------- - calibMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFile))**2 - # calibNumErr = pd.read_csv('data/NumErrorCalib_stokespnm_squared_inclusion_{}Inflow.csv'.format(inletLoc))**2 + # calibMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFile)) ** 2 + # calibNumErr = pd.read_csv('data/NumErrorCalib_stokespnm_\ + # squared_inclusion_{}Inflow.csv'.format(inletLoc))**2 - DiscrepancyOpts = Discrepancy('') - DiscrepancyOpts.Type = 'Gaussian' - DiscrepancyOpts.Parameters = calibMeasuError#.add(calibNumErr,fill_value=0) + DiscrepancyOpts = Discrepancy("") + DiscrepancyOpts.Type = "Gaussian" + # DiscrepancyOpts.Parameters = calibMeasuError.add(calibNumErr,fill_value=0) # data = pd.read_csv(Model.MeasurementFile) # DiscrepancyOpts.Parameters = ((data**2)/((data**2).sum())) # BayesOptsCalib.Discrepancy = DiscrepancyOpts @@ -298,27 +335,28 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # delta_dict[out] = np.mean((data - ED_Y[out])**2,axis=0) delta_dict[out] = np.ones_like(data) - errormodels = pd.DataFrame.from_dict(delta_dict,orient='index').T - DiscrepancyOpts.Parameters = errormodels#/errormodels.sum() + errormodels = pd.DataFrame.from_dict(delta_dict, orient="index").T + DiscrepancyOpts.Parameters = errormodels # /errormodels.sum() BayesOptsCalib.Discrepancy = DiscrepancyOpts - # # -- (Option A) -- DiscOutputOpts = Input() # # OutputName = 'velocity' DiscOutputOpts.addMarginals() - DiscOutputOpts.Marginals[0].Name = '$\sigma^2_{vel}$' - DiscOutputOpts.Marginals[0].DistType = 'unif' - DiscOutputOpts.Marginals[0].Parameters = [0,1e-5] + DiscOutputOpts.Marginals[0].Name = "$\sigma^2_{vel}$" + DiscOutputOpts.Marginals[0].DistType = "unif" + DiscOutputOpts.Marginals[0].Parameters = [0, 1e-5] # # OutputName = 'pressure' DiscOutputOpts.addMarginals() - DiscOutputOpts.Marginals[1].Name = '$\sigma^2_{p}$' - DiscOutputOpts.Marginals[1].DistType = 'unif' - DiscOutputOpts.Marginals[1].Parameters = [0,1e-3] + DiscOutputOpts.Marginals[1].Name = "$\sigma^2_{p}$" + DiscOutputOpts.Marginals[1].DistType = "unif" + DiscOutputOpts.Marginals[1].Parameters = [0, 1e-3] # BayesOptsCalib.Discrepancy = Discrepancy(DiscOutputOpts) - BayesOptsCalib.Discrepancy = {'known': DiscrepancyOpts, - 'infer': Discrepancy(DiscOutputOpts)} + BayesOptsCalib.Discrepancy = { + "known": DiscrepancyOpts, + "infer": Discrepancy(DiscOutputOpts), + } # -- (Option B) -- # Model bias plus the unknown errors @@ -364,33 +402,33 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm BayesCalib = BayesOptsCalib.create_Inference() # Save class objects - with open('PA_A_Bayes'+Model.Name+'.pkl', 'wb') as output: + with open("PA_A_Bayes" + Model.Name + ".pkl", "wb") as output: joblib.dump(BayesCalib, output, 2) # Load the objects # with open('PA_A_Bayes'+Model.Name+'.pkl', 'rb') as input: # BayesCalib = joblib.load(input) - #===================================================== - #================= Visualization =================== - #===================================================== + # ===================================================== + # ================= Visualization =================== + # ===================================================== # Plot posterior predictive - postPredictiveplot(PCEModel.ModelObj.Name, errorPerc, averaging, - inletLoc=inletLoc, case='Calib', bins=20) + postPredictiveplot(PCEModel.ModelObj.Name, averaging, inletLoc=inletLoc, + case="Calib", bins=20) - #===================================================== - #================== VALIDATION ===================== - #===================================================== + # ===================================================== + # ================== VALIDATION ===================== + # ===================================================== ValidInputs = Input() - ValidInputs.addMarginals() #VyMaxTop - ValidInputs.Marginals[0].Name = '$V^{top}$' - ValidInputs.Marginals[0].InputValues = BayesCalib.Posterior_df['$V^{top}$'] - - ValidInputs.addMarginals() #TransmissibilityTotal - ValidInputs.Marginals[1].Name = '$g_{ij}$' - ValidInputs.Marginals[1].InputValues = BayesCalib.Posterior_df['$g_{ij}$'] - + ValidInputs.addMarginals() # VyMaxTop + ValidInputs.Marginals[0].Name = "$V^{top}$" + ValidInputs.Marginals[0].InputValues = BayesCalib.Posterior_df["$V^{top}$"] + + ValidInputs.addMarginals() # TransmissibilityTotal + ValidInputs.Marginals[1].Name = "$g_{ij}$" + ValidInputs.Marginals[1].InputValues = BayesCalib.Posterior_df["$g_{ij}$"] + # ValidInputs.addMarginals() #TransmissibilityThroat # ValidInputs.Marginals[1].Name = '$g_{t,ij}$' # ValidInputs.Marginals[1].InputValues = BayesCalib.Posterior_df['$g_{t,ij}$'] @@ -400,88 +438,94 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # ValidInputs.Marginals[2].InputValues = BayesCalib.Posterior_df['$g_{p,i}$'] ValidInputs.addMarginals() - ValidInputs.Marginals[3].Name = '$\\beta_{pore}$' - ValidInputs.Marginals[3].InputValues = BayesCalib.Posterior_df['$\\beta_{pore}$'] + ValidInputs.Marginals[2].Name = "$\\beta_{pore}$" + ValidInputs.Marginals[2].InputValues = BayesCalib.Posterior_df["$\\beta_{pore}$"] # ----------------------------------------------------------------- - print("\n" + "-"*40) + print("\n" + "-" * 40) print("PCE training for validation.") - print("-"*40 + "\n") + print("-" * 40 + "\n") import copy + ValidModel = copy.deepcopy(Model) - ValidModel.Name = '{}-valid'.format(Model.Name) + ValidModel.Name = "{}-valid".format(Model.Name) ValidMetaModelOpts = copy.deepcopy(MetaModelOpts) ValidMetaModelOpts.Inputs = ValidInputs - ValidMetaModelOpts.ExpDesign.Method = 'normal' + ValidMetaModelOpts.ExpDesign.Method = "normal" ValidMetaModelOpts.ExpDesign.InputObj = ValidInputs ValidMetaModelOpts.ExpDesign.Y = None - ValidMetaModelOpts.ExpDesign.SamplingMethod = 'random' #'random' user - ValidMetaModelOpts.ExpDesign.hdf5File = None#"ExpDesign_{}-valid.hdf5".format(Model.Name)#None - + ValidMetaModelOpts.ExpDesign.SamplingMethod = "random" # 'random' user + ValidMetaModelOpts.ExpDesign.hdf5File = None + # "ExpDesign_{}-valid.hdf5".format(Model.Name)#None + import h5py - HDF5File = "ExpDesign_{}-testset_Validation.hdf5".format(Model.Name) - ValidSets = h5py.File("./data/ValidationSets/"+HDF5File, 'r+') - parNames = list(BayesCalib.Posterior_df.keys())[:PCEModel.NofPa] - indices = check_ranges(np.array(ValidSets["EDX/init_"]), BayesCalib.Posterior_df[parNames]) + HDF5File = f"ExpDesign_{Model.Name}-testset_Validation.hdf5" + ValidSets = h5py.File(f"./data/ValidationSets/{HDF5File}", "r+") + parNames = list(BayesCalib.Posterior_df.keys())[: PCEModel.NofPa] + indices = check_ranges( + np.array(ValidSets["EDX/init_"]), BayesCalib.Posterior_df[parNames] + ) ValidMetaModelOpts.validSamples = np.array(ValidSets["EDX/init_"])[indices] - validModelRuns = dict() - OutputNames = ['velocity [m/s]', 'p'] + validSets = dict() + OutputNames = ["velocity [m/s]", "p"] # Extract x values - validModelRuns["x_values"] = dict() + validSets["x_values"] = dict() for varIdx, var in enumerate(OutputNames): - validModelRuns["x_values"][var] = np.array(ValidSets["x_values/"+var]) + validSets["x_values"][var] = np.array(ValidSets[f"x_values/{var}"]) for varIdx, var in enumerate(OutputNames): - validModelRuns[var] = np.array(ValidSets["EDY/"+var+"/init_"])[indices] + validSets[var] = np.array(ValidSets[f"EDY/{var}/init_"])[indices] ValidSets.close() - ValidMetaModelOpts.validModelRuns = validModelRuns + ValidMetaModelOpts.validModelRuns = validSets # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Train the meta model ValidPCEModel = ValidMetaModelOpts.create_metamodel(ValidModel) # Remove zip file - if os.path.isfile(ValidModel.Name + '.zip'): os.remove(ValidModel.Name + '.zip') + if os.path.isfile(ValidModel.Name + ".zip"): + os.remove(ValidModel.Name + ".zip") # Save PCE models - with open('PCEModel_'+ValidModel.Name+'.pkl', 'wb') as output: + with open("PCEModel_" + ValidModel.Name + ".pkl", "wb") as output: joblib.dump(ValidPCEModel, output, 2) # Load the objects # with open('PCEModel_'+ValidModel.Name+'.pkl', 'rb') as input: # ValidPCEModel = joblib.load(input) - #===================================================== - #========= POST PROCESSING OF METAMODELS =========== - #===================================================== - ValidPostPCE = PostProcessing(ValidPCEModel, Name='valid') + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== + ValidPostPCE = PostProcessing(ValidPCEModel, Name="valid") # Compute the moments and compare with the Monte-Carlo reference - ValidPostPCE.plotMoments(xlabel='Point ID', plotType='bar') + ValidPostPCE.plotMoments(xlabel="Point ID", plotType="bar") # Plot the sobol indices - sobol_cell, total_sobol = ValidPostPCE.sobolIndicesPCE(xlabel='Point ID', plotType='bar') + _, _ = ValidPostPCE.sobolIndicesPCE(xlabel="Point ID", plotType="bar") # Plot the evolution of the KLD,BME, and Modified LOOCV error - if ValidPCEModel.ExpDesign.Method == 'sequential': + if ValidPCEModel.ExpDesign.Method == "sequential": ValidPostPCE.seqDesignDiagnosticPlots() # Compute and print RMSE error try: - ValidPostPCE.accuracyCheckMetaModel(Samples=ValidMetaModelOpts.validSamples, - validOutputsDict=validModelRuns) + ValidPostPCE.accuracyCheckMetaModel( + Samples=ValidMetaModelOpts.validSamples, validOutputsDict=validSets + ) except: ValidPCEModel.RMSE = None # ValidPostPCE.accuracyCheckMetaModel(nSamples=5) - #===================================================== - #========= Bayesian inference (Validation) ========= - #===================================================== + # ===================================================== + # ========= Bayesian inference (Validation) ========= + # ===================================================== BayesOptsValid = BayesInference(ValidPCEModel) - BayesOptsValid.Name = 'Valid' + BayesOptsValid.Name = "Valid" BayesOptsValid.emulator = True # Bootstrap for BME calulations @@ -496,23 +540,22 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # Bayesian cross validation # BayesOptsValid.BayesLOOCV = True - BayesOptsValid.MAP = 'mean' + BayesOptsValid.MAP = "mean" BayesOptsValid.PlotPostDist = True - BayesOptsValid.Corner_title_fmt = '.2e' + BayesOptsValid.Corner_title_fmt = ".2e" BayesOptsValid.PlotPostPred = False - # # ----- Define the discrepancy model ------- - validMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFileValid))**2 + # validMeasuError = (errorPerc * pd.read_csv(Model.MeasurementFileValid)) ** 2 # validNumErr = pd.read_csv('data/NumErrorValid_stokespnm_squared_inclusion_{}Inflow.csv'.format(inletLoc))**2 # DiscrepancyOpts = Discrepancy('') # DiscrepancyOpts.Type = 'Gaussian' - DiscrepancyOpts.Parameters = validMeasuError#.add(validNumErr,fill_value=0) + # DiscrepancyOpts.Parameters = validMeasuError # .add(validNumErr,fill_value=0) # Pass expected inferred sigma2s data = pd.read_csv(Model.MeasurementFileValid) - factors = ((data**2)/((data**2).sum())) + factors = (data ** 2) / ((data ** 2).sum()) delta_dict = {} Data = pd.read_csv(Model.MeasurementFileValid) ED_Y = ValidPCEModel.ExpDesign.Y @@ -525,15 +568,19 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # Mean absolute error # delta_dict[out] = np.mean(abs(data - ED_Y[out]),axis=0) # Mean squared error - delta_dict[out] = np.mean((data - ED_Y[out])**2,axis=0) - # delta_dict[out] = np.ones_like(data) + # delta_dict[out] = np.mean((data - ED_Y[out]) ** 2, axis=0) + delta_dict[out] = np.ones_like(data) - errormodels = pd.DataFrame.from_dict(delta_dict,orient='index').T - factors = errormodels/errormodels.sum() - DiscrepancyOpts.Parameters = {'velocity [m/s]':np.mean(BayesCalib.Posterior_df['$\sigma^2_{vel}$'].values)*factors['velocity [m/s]'].values, - 'p':np.mean(BayesCalib.Posterior_df['$\sigma^2_{p}$'].values)*factors['p'].values} + errormodels = pd.DataFrame.from_dict(delta_dict, orient="index").T + factors = errormodels #/ errormodels.sum() + DiscrepancyOpts.Parameters = { + "velocity [m/s]": np.mean(BayesCalib.Posterior_df["$\sigma^2_{vel}$"].values) + * factors["velocity [m/s]"].values, + "p": np.mean(BayesCalib.Posterior_df["$\sigma^2_{p}$"].values) + * factors["p"].values, + } BayesOptsValid.Discrepancy = DiscrepancyOpts - + # Construct error model based on discrepancy between ED and Data # BayesOptsValid.errorModel = True # coord_vel = pd.read_csv('models/velocity_points.csv') @@ -543,58 +590,61 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # BayesOptsValid.BiasInputs = {'velocity [m/s]':coord_vel, # 'p':coord_p} # BayesOptsValid.errorMetaModel = BayesCalib.errorMetaModel - + # ----- Strat Bayesian inference ------- BayesValid = BayesOptsValid.create_Inference() # Save class objects - with open('PA_A_Bayes'+ValidModel.Name+'.pkl', 'wb') as output: + with open("PA_A_Bayes" + ValidModel.Name + ".pkl", "wb") as output: joblib.dump(BayesValid, output, 2) - np.savetxt('logBME_'+ValidModel.Name+'.csv',BayesValid.logBME) - #===================================================== - #================= Visualization =================== - #===================================================== + np.savetxt("logBME_" + ValidModel.Name + ".csv", BayesValid.logBME) + # ===================================================== + # ================= Visualization =================== + # ===================================================== # Plot posterior predictive - postPredictiveplot(ValidPCEModel.ModelObj.Name, errorPerc, averaging, - case='Valid',inletLoc=inletLoc, bins=20) + postPredictiveplot(ValidPCEModel.ModelObj.Name, averaging, case="Valid", + inletLoc=inletLoc, bins=20) - #===================================================== - #======== Moving folders to a new folder =========== - #===================================================== + # ===================================================== + # ======== Moving folders to a new folder =========== + # ===================================================== # Create a output directory - outdir = 'outputs_' + Model.Name + outdir = "outputs_" + Model.Name os.makedirs(outdir, exist_ok=True) # Move files to a main folder - for case in ['Calib', 'Valid']: - metaModel = PCEModel if case == 'Calib' else ValidPCEModel + for case in ["Calib", "Valid"]: + metaModel = PCEModel if case == "Calib" else ValidPCEModel ModelName = metaModel.ModelObj.Name - files = ['ExpDesign_'+ModelName+'.hdf5', - 'PCEModel_'+ModelName+'.pkl', - 'PA_A_Bayes'+ModelName+'.pkl'] + files = [ + "ExpDesign_" + ModelName + ".hdf5", + "PCEModel_" + ModelName + ".pkl", + "PA_A_Bayes" + ModelName + ".pkl", + ] - if case == 'Valid': - files.append('logBME_'+ValidModel.Name+'.csv') + if case == "Valid": + files.append("logBME_" + ValidModel.Name + ".csv") for file in files: - shutil.move("./"+file, outdir+'/'+file) - + shutil.move("./" + file, outdir + "/" + file) # Move folders to a main forlder - folders = ['Outputs_PostProcessing_'+case.lower(), - 'Outputs_Bayes_'+ModelName+'_'+case, - 'postPred_'+ModelName+'_'+case] + folders = [ + "Outputs_PostProcessing_" + case.lower(), + "Outputs_Bayes_" + ModelName + "_" + case, + "postPred_" + ModelName + "_" + case, + ] for folder in folders: try: - shutil.move("./"+folder, outdir) + shutil.move("./" + folder, outdir) except: pass return PCEModel, BayesCalib, BayesValid -#========================================================================== -#======================== Set Parameters ============================== -#========================================================================== +# ========================================================================== +# ======================== Set Parameters ============================== +# ========================================================================== # ---------- set BayesValidRox params ---------- # PCEExpDesignMethod = 'normal' # 'normal' or 'sequential' # nInitSamples = 100 #50 Initial No. of orig. Model runs for surrogate training @@ -608,8 +658,8 @@ def run(params, averaging=True,errorPerc=0.05, inletLoc='top', PCEEDMethod='norm # params = (nInitSamples,nTotalSamples,nBootstrapItr,perturbedData) -#========================================================================== -#==================== Run main scripts for PA-B ======================= -#========================================================================== -# PCEModel, BayesCalib, BayesValid = run(params,averaging=False, PCEEDMethod=PCEExpDesignMethod) -# PCEModel = run(params,averaging=True, inletLoc='top', errorPerc=0.05, PCEEDMethod=PCEExpDesignMethod) +# ========================================================================== +# ==================== Run main scripts for PA-B ======================= +# ========================================================================== +# PCEModel, BayesCalib, BayesValid = run(params,averaging=False) +# PCEModel = run(params,averaging=False, inletLoc='top') diff --git a/BayesValidRox/tests/PA-A/util/EDY_plot.py b/BayesValidRox/tests/PA-A/util/EDY_plot.py index 6686e264b..09e80abaa 100644 --- a/BayesValidRox/tests/PA-A/util/EDY_plot.py +++ b/BayesValidRox/tests/PA-A/util/EDY_plot.py @@ -6,147 +6,189 @@ Created on Tue May 18 16:19:43 2021 @author: farid """ -import sys, os, joblib +import sys +import os +import joblib import numpy as np -import scipy.stats as stats -import shutil import pandas as pd -import corner import seaborn as sns +import h5py import matplotlib -from sklearn.metrics import mean_squared_error from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pylab as plt + +matplotlib.use("agg") -matplotlib.use('agg') +inletLoc = "top" # top or left +inclusion = "squared" # squared or circular +case = "Calib" # Calib or Valid -inletLoc = 'top' # top or left -inclusion = 'squared' # squared or circular +Name = "stokesdarcyER" # stokesdarcyER stokesdarcyBJ stokespnm stokespnmNA +modelName = f"ffpm-{Name}_{inclusion}_inclusion_{inletLoc}Inflow" -Name = 'stokesdarcyER' #stokesdarcyER stokesdarcyBJ stokespnm stokespnmNA -modelName = 'ffpm-{}_{}_inclusion_{}Inflow'.format(Name,inclusion,inletLoc) -path = '../Results_07_12_2021_topInflow/outputs_{}/'.format(modelName) +path = f"../outputs_{modelName}/" # Load the objects -with open(path+'PCEModel_'+modelName+'.pkl', 'rb') as input: +if case.lower() == "calib": + pkl_name = f"{path}PCEModel_{modelName}" +else: + pkl_name = f"{path}PCEModel_{modelName}-valid" + +with open(pkl_name + ".pkl", "rb") as input: PCEModel = joblib.load(input) -import matplotlib.pylab as plt + SIZE = 30 -plt.rc('figure', figsize = (24, 16)) -plt.rc('font', family='serif', serif='Arial') -plt.rc('font', size=SIZE) -plt.rc('text', usetex=True) -plt.rc('axes', linewidth=3) -plt.rc('axes', grid=True) -plt.rc('grid', linestyle="-") -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) # legend fontsize -plt.rc('figure', titlesize=SIZE) # fontsize of the figure title +plt.rc("figure", figsize=(24, 16)) +plt.rc("font", family="serif", serif="Arial") +plt.rc("font", size=SIZE) +plt.rc("text", usetex=True) +plt.rc("axes", linewidth=3) +plt.rc("axes", grid=True) +plt.rc("grid", linestyle="-") +plt.rc("axes", titlesize=SIZE) # fontsize of the axes title +plt.rc("axes", labelsize=SIZE) # fontsize of the x and y labels +plt.rc("xtick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("ytick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("legend", fontsize=SIZE) # legend fontsize +plt.rc("figure", titlesize=SIZE) # fontsize of the figure title # Add BayesValidRox path -sys.path.insert(0,'./../../../BayesValidRox/') -pdf = PdfPages('EDY.pdf') +sys.path.insert(0, "./../../../BayesValidRox/") +pdf = PdfPages("EDY.pdf") # Prescribe Output -output = 'velocity [m/s]' #'velocity [m/s]' 'p' +output = "velocity [m/s]" # 'velocity [m/s]' 'p' + # Select the ExpDesign -EDX = PCEModel.ExpDesign.X -EDY = PCEModel.ExpDesign.Y[output] +sim_tpye = "Calibration" if case.lower() == "calib" else "Validation" +HDF5File = f"ExpDesign_{modelName}-testset_{sim_tpye}.hdf5" +ValidSets = h5py.File(f"../data/ValidationSets/{HDF5File}", "r+") +validSamples = np.array(ValidSets["EDX/init_"]) +validSamplesY = np.array(ValidSets[f"EDY/{output}/init_"]) + +EDX = np.vstack((PCEModel.ExpDesign.X, validSamples)) +EDY = np.vstack((PCEModel.ExpDesign.Y[output], validSamplesY)) +ValidSets.close() # Run the surrogate model -# EDY, _ , EDX= PCEModel.eval_metamodel(nsamples=1000,return_samples=True) +# EDY, _, EDX = PCEModel.eval_metamodel(nsamples=100000, return_samples=True) # EDY = EDY[output] # Data -data = pd.read_csv('../'+PCEModel.ModelObj.MeasurementFile) +if case.lower() == "calib": + data = pd.read_csv("../" + PCEModel.ModelObj.MeasurementFile) +else: + data = pd.read_csv("../" + PCEModel.ModelObj.MeasurementFileValid) # Filter based on data range -# freezed_ID = [1,2] -freezed_ID = [7,8] +# freezed_ID = range(1, 11) +freezed_ID = [7, 8, 9, 10] # freezed_ID = [9,10] bound = 0.05 -l_limit,u_limit = {},{} -critIdx = np.array([True]*len(EDY)) +l_limit, u_limit = {}, {} +critIdx = np.array([True] * len(EDY)) for i, f_idx in enumerate(freezed_ID): - l_limit[f_idx] = data[output][f_idx-1]*(1-bound) - u_limit[f_idx] = data[output][f_idx-1]*(1+bound) - critIdx *= (EDY[:,int(f_idx)-1]>=l_limit[f_idx]) * \ - (EDY[:,int(f_idx)-1]<=u_limit[f_idx]) + l_limit[f_idx] = data[output][f_idx - 1] * (1 - bound) + u_limit[f_idx] = data[output][f_idx - 1] * (1 + bound) + critIdx *= (EDY[:, int(f_idx) - 1] >= l_limit[f_idx]) * ( + EDY[:, int(f_idx) - 1] <= u_limit[f_idx] + ) # Select the filtered sets -critEDX = EDX[critIdx] -critEDY = EDY[critIdx] +critEDX = EDX # [critIdx] +critEDY = EDY # [critIdx] -print("\nNumber of simulations found within the given range: {0} from {1}" \ - .format(critEDX.shape[0],EDX.shape[0])) +print( + f"\nNumber of simulations found within the given range: \n" + f"{critEDX.shape[0]} out of {EDX.shape[0]}" +) while critEDX.shape[0] == 0: raise Exception("No parameter set found!") - - + +# Run the surrogate model +y_hat, y_std = PCEModel.eval_metamodel(samples=critEDX) + # Plot for each Point ID -for idx in range(1,11): - +for idx in range(1, 11): + # Plot the predictions vs simulations fig, ax = plt.subplots() - plt.title("Point ID: {}".format(idx)) - + plt.title(f"Point ID: {idx}") + # Simulations - plt.plot(critEDY[:,idx-1], color='navy',marker='x', linewidth=2, - label='Simulation') - + plt.plot( + critEDY[:, idx - 1], color="navy", marker="x", linewidth=2, label="Simulation" + ) + + # Plot the predition and its uncertainty + plt.plot( + y_hat[output][:, idx - 1], + color="green", + ls="--", + marker="x", + linewidth=2, + label="Surrogate Approximation", + ) + + plt.fill_between( + np.arange(len(critEDY)), + y_hat[output][:, idx - 1] - 1 * y_std[output][:, idx - 1], + y_hat[output][:, idx - 1] + 1 * y_std[output][:, idx - 1], + color="green", + alpha=0.25, + ) + if idx in freezed_ID: - f_ID = np.where(np.array(freezed_ID)==idx)[0][0] + f_ID = np.where(np.array(freezed_ID) == idx)[0][0] try: - plt.fill_between(np.arange(len(critEDY)), l_limit[idx], u_limit[idx], - color='red',alpha=0.25) + plt.fill_between( + np.arange(len(critEDY)), + l_limit[idx], + u_limit[idx], + color="red", + alpha=0.25, + ) except: pass - - # RMSE = mean_squared_error(critEDY[:,idx-1], y_hat[output][:,idx-1], - # squared=False, multioutput='raw_values') - + # Plot data - plt.hlines(data[output][idx-1],0,len(critEDY[:,idx-1]), linewidth=3, - color='red',label='Ref. data') - - # Print a report table - # print("\n>>>>> Errors of {} <<<<<".format(output)) - # print("\nIndex | RMSE") - # print('-'*35) - # print('\n'.join('{0} | {1:.3e}'.format(i+1,k) for i,k \ - # in enumerate(RMSE))) - # plt.plot(np.arange(len(critEDY),len(EDX)), EDY[normIdx][:,idx-1], color='red', linewidth=2) - + plt.hlines( + data[output][idx - 1], + 0, + len(critEDY[:, idx - 1]), + linewidth=3, + color="red", + label="Ref. data", + ) + plt.ylabel(output) plt.xlabel("Run number") plt.legend(loc="best") # plt.show() - + # save the current figure - pdf.savefig(fig, bbox_inches='tight') + pdf.savefig(fig, bbox_inches="tight") # Destroy the current plot - plt.clf(); - -pdf.close(); + plt.clf() + +pdf.close() SIZE = 12 -plt.rc('font', size=SIZE) -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) +plt.rc("font", size=SIZE) +plt.rc("axes", titlesize=SIZE) # fontsize of the axes title +plt.rc("axes", labelsize=SIZE) # fontsize of the x and y labels +plt.rc("xtick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("ytick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("legend", fontsize=SIZE) labels = [PCEModel.Inputs.Marginals[i].Name for i in range(PCEModel.NofPa)] -df1 = pd.DataFrame(EDX,columns=labels) -df2 = pd.DataFrame(critEDX,columns=labels) -df = pd.concat([df1.assign(dataset='EDX'), df2.assign(dataset='ValidSet')]) -g = sns.pairplot(df, hue='dataset', diag_kind = 'hist') -g.savefig('validSamples.svg',bbox_inches='tight') \ No newline at end of file +df1 = pd.DataFrame(EDX, columns=labels) +df2 = pd.DataFrame(critEDX, columns=labels) +df = pd.concat([df1.assign(dataset="EDX"), df2.assign(dataset="ValidSet")]) +g = sns.pairplot(df, hue="dataset", diag_kind="hist") +g.savefig("validSamples.svg", bbox_inches="tight") diff --git a/BayesValidRox/tests/PA-A/util/post_plot_PAPER.py b/BayesValidRox/tests/PA-A/util/post_plot_PAPER.py index 6cd466581..c76da1eee 100644 --- a/BayesValidRox/tests/PA-A/util/post_plot_PAPER.py +++ b/BayesValidRox/tests/PA-A/util/post_plot_PAPER.py @@ -20,169 +20,214 @@ from matplotlib.patches import Patch import matplotlib.lines as mlines from matplotlib import ticker import matplotlib.pylab as plt + # Add BayesValidRox path -sys.path.insert(0,'./../../../../BayesValidRox/') -plt.style.use('seaborn-deep') # talk, paper, poster -#fsize = 50 -#params = {'legend.fontsize': fsize, - #'axes.labelsize': fsize, - #'axes.titlesize': fsize, - #'xtick.labelsize' :fsize, - #'ytick.labelsize': fsize, - #'grid.color': 'k', - #'grid.linestyle': ':', - #'grid.linewidth': 0.5, +sys.path.insert(0, "./../../../../BayesValidRox/") +plt.style.use("seaborn-deep") # talk, paper, poster +# fsize = 50 +# params = {'legend.fontsize': fsize, +#'axes.labelsize': fsize, +#'axes.titlesize': fsize, +#'xtick.labelsize' :fsize, +#'ytick.labelsize': fsize, +#'grid.color': 'k', +#'grid.linestyle': ':', +#'grid.linewidth': 0.5, ## 'mathtext.fontset' : 'stix', ## 'mathtext.rm' : 'serif', - #'font.family' : 'serif', - #'font.serif' : "Times New Roman", # or "Times" - #'figure.figsize' : (32, 24), - ## 'savefig.dpi':1500, - #'text.usetex': True - #} -#plt.rcParams.update(params) - -def upper_rugplot(data, height=.05, ax=None, **kwargs): +#'font.family' : 'serif', +#'font.serif' : "Times New Roman", # or "Times" +#'figure.figsize' : (32, 24), +## 'savefig.dpi':1500, +#'text.usetex': True +# } +# plt.rcParams.update(params) + + +def upper_rugplot(data, height=0.05, ax=None, **kwargs): from matplotlib.collections import LineCollection + ax = ax or plt.gca() kwargs.setdefault("linewidth", 1) - segs = np.stack((np.c_[data, data], - np.c_[np.ones_like(data), np.ones_like(data)-height]), - axis=-1) + segs = np.stack( + (np.c_[data, data], np.c_[np.ones_like(data), np.ones_like(data) - height]), + axis=-1, + ) lc = LineCollection(segs, transform=ax.get_xaxis_transform(), **kwargs) ax.add_collection(lc) -def postPredictiveplot(modelName, errorPrec, averaging=True, case='Calib',inletLoc='top', bins='auto'): - #result_folder = '../Results_14_06_2021/outputs_{}/'.format(modelName.split('-v')[0]) - result_folder = './' - directory = result_folder+'Outputs_Bayes_'+modelName+'_'+case - OutputDir = ('postPred_'+modelName+'_'+case) - if not os.path.exists(OutputDir): os.makedirs(OutputDir) + +def postPredictiveplot( + modelName, averaging=True, case="Calib", inletLoc="top", bins="auto" +): + # result_folder = '../Results_14_06_2021/outputs_{}/'.format(modelName.split('-v')[0]) + result_folder = "./" + directory = result_folder + "Outputs_Bayes_" + modelName + "_" + case + OutputDir = "postPred_" + modelName + "_" + case + if not os.path.exists(OutputDir): + os.makedirs(OutputDir) # Load Post pred file - f = h5py.File(directory+'/'+"postPredictive.hdf5", 'r+') + f = h5py.File(directory + "/" + "postPredictive.hdf5", "r+") # Load PCEModel - with open(result_folder+'PCEModel_'+modelName+'.pkl', 'rb') as input: + with open(result_folder + "PCEModel_" + modelName + ".pkl", "rb") as input: PCEModel = joblib.load(input) - - if case == 'Calib': + + if case == "Calib": data = pd.read_csv(PCEModel.ModelObj.MeasurementFile) else: data = pd.read_csv(PCEModel.ModelObj.MeasurementFileValid) - + # Generate Prior Predictive - #priorPred, std = PCEModel.eval_metamodel(nsamples=10000) + # priorPred, std = PCEModel.eval_metamodel(nsamples=10000) my_cmap = plt.get_cmap("Dark2") - for OutputName in PCEModel.ModelObj.Output.Names: - + # ---- Bar chart ---- - width = 0.35 if OutputName != 'p' else 0.1 + width = 0.35 if OutputName != "p" else 0.1 fig = plt.figure() - x_coords = np.array(f["x_values/"+OutputName]) - + x_coords = np.array(f["x_values/" + OutputName]) + refdata = data[OutputName][~np.isnan(data[OutputName])].values - postPred = np.array(f["EDY/"+OutputName]) - meanPred = np.mean(postPred,axis=0) - stdPred = np.std(postPred,axis=0) - plt.bar(np.array(x_coords)-0.5*width, meanPred, yerr=stdPred, - width=width, color=my_cmap.colors[1], capsize=7) - plt.bar(np.array(x_coords)+0.5*width, refdata, - width,color=my_cmap.colors[0]) + postPred = np.array(f["EDY/" + OutputName]) + meanPred = np.mean(postPred, axis=0) + stdPred = np.std(postPred, axis=0) + plt.bar( + np.array(x_coords) - 0.5 * width, + meanPred, + yerr=stdPred, + width=width, + color=my_cmap.colors[1], + capsize=7, + ) + plt.bar( + np.array(x_coords) + 0.5 * width, refdata, width, color=my_cmap.colors[0] + ) plt.xlabel("Point ID") - plt.ylabel(OutputName) if OutputName != 'p' else plt.ylabel('pressure [bar]') - plt.legend(["Prediction","Ref. Data"]) + plt.ylabel(OutputName) if OutputName != "p" else plt.ylabel("pressure [bar]") + plt.legend(["Prediction", "Ref. Data"]) plt.xticks(x_coords) - + # save the current figure - plotname = OutputName if OutputName == 'p' else 'velocity' - fig.savefig('./'+OutputDir+'/'+plotname+'_barchart.svg', bbox_inches='tight') - fig.savefig('./'+OutputDir+'/'+plotname+'_barchart.pdf', bbox_inches='tight') + plotname = OutputName if OutputName == "p" else "velocity" + fig.savefig( + "./" + OutputDir + "/" + plotname + "_barchart.svg", bbox_inches="tight" + ) + fig.savefig( + "./" + OutputDir + "/" + plotname + "_barchart.pdf", bbox_inches="tight" + ) plt.close() - + # ---- Hist plot ---- # Find pointIDs - csv_file = 'pressure_points.csv' if OutputName == 'p' else 'velocity_points.csv' - if case == 'Calib': - pointIDs = pd.read_csv('./models/'+csv_file).query("__vtkIsSelected__ \ - == 'Calibration'")['vtkOriginalPointIds'].to_numpy() + csv_file = "pressure_points.csv" if OutputName == "p" else "velocity_points.csv" + if case == "Calib": + pointIDs = ( + pd.read_csv("./models/" + csv_file) + .query( + "__vtkIsSelected__ \ + == 'Calibration'" + )["vtkOriginalPointIds"] + .to_numpy() + ) else: - pointIDs = pd.read_csv('./models/'+csv_file).query("__vtkIsSelected__ \ - == 'Validation'")['vtkOriginalPointIds'].to_numpy() + pointIDs = ( + pd.read_csv("./models/" + csv_file) + .query( + "__vtkIsSelected__ \ + == 'Validation'" + )["vtkOriginalPointIds"] + .to_numpy() + ) import matplotlib + gs00 = matplotlib.gridspec.GridSpec(3, 4) fig = plt.figure() - cnt=0 + cnt = 0 for idx, x in enumerate(pointIDs): - if idx!=0 and idx%4==0: cnt += 1 + if idx != 0 and idx % 4 == 0: + cnt += 1 if idx > 7: - ax = fig.add_subplot(gs00[cnt,idx%4+1:idx%4+2]) + ax = fig.add_subplot(gs00[cnt, idx % 4 + 1 : idx % 4 + 2]) else: - ax = fig.add_subplot(gs00[cnt,idx%4]) + ax = fig.add_subplot(gs00[cnt, idx % 4]) # Prior predictive - #outputs = priorPred[OutputName][:,idx] - #sns.histplot(outputs[outputs>0], ax=ax,# bins=50, - #color='blue', alpha=0.4,stat="count") + # outputs = priorPred[OutputName][:,idx] + # sns.histplot(outputs[outputs>0], ax=ax,# bins=50, + # color='blue', alpha=0.4,stat="count") # Posterior predictive - postPred = np.array(f["EDY/"+OutputName])[:,idx] - sns.histplot(postPred[postPred>0], ax=ax, bins=bins, - color='orange',stat="count") #normalizes counts so that the sum of the bar heights is 1 + postPred = np.array(f["EDY/" + OutputName])[:, idx] + sns.histplot( + postPred[postPred > 0], ax=ax, bins=bins, color="orange", stat="count" + ) # normalizes counts so that the sum of the bar heights is 1 # Reference data from the pore-scale simulation - ax.axvline(x=data[OutputName][idx], linewidth=5, color='green') - #sns.histplot(np.random.normal(data[OutputName][idx], errorPrec*data[OutputName][idx], len(postPred[postPred>0])), ax=ax,bins=bins, - #color='green', alpha=0.5, stat="count") + ax.axvline(x=data[OutputName][idx], linewidth=5, color="green") + # sns.histplot(np.random.normal(data[OutputName][idx], errorPrec*data[OutputName][idx], len(postPred[postPred>0])), ax=ax,bins=bins, + # color='green', alpha=0.5, stat="count") # Print conidence interval of ExpDesign.Y (Trained area) - modelRuns = PCEModel.ExpDesign.Y[OutputName][:,idx] - upper_rugplot(modelRuns, ax=ax, alpha=0.75, color='grey') + modelRuns = PCEModel.ExpDesign.Y[OutputName][:, idx] + upper_rugplot(modelRuns, ax=ax, alpha=0.75, color="grey") # fmt = ticker.StrMethodFormatter("{x}") # ax.xaxis.set_major_formatter(fmt) # ax.yaxis.set_major_formatter(fmt) legend_elements = [ - Patch(facecolor='orange', edgecolor='orange',label='Posterior Pred.'), - Patch(facecolor='green', edgecolor='green',alpha=0.5, label='Ref. Data')] - if case == 'Calib': - legend_elements.append(mlines.Line2D([], [], marker='|', color='grey', alpha=0.75, - linestyle='None', markersize=15, markeredgewidth=1.5, label='Orig. Responses')) - - font = {'family': 'serif', - 'weight': 'normal', - 'size': 28, - } - - ax.legend(handles=legend_elements, fontsize=font['size']-12) - - - #x_label = 'Pressure [Pa]' if OutputName == 'p' else 'velocity [m/s]' - #ax.set_xlabel(x_label, fontdict=font) - ax.set_ylabel('Count', fontdict=font) + Patch(facecolor="orange", edgecolor="orange", label="Posterior Pred."), + Patch( + facecolor="green", edgecolor="green", alpha=0.5, label="Ref. Data" + ), + ] + if case == "Calib": + legend_elements.append( + mlines.Line2D( + [], + [], + marker="|", + color="grey", + alpha=0.75, + linestyle="None", + markersize=15, + markeredgewidth=1.5, + label="Orig. Responses", + ) + ) + + font = {"family": "serif", "weight": "normal", "size": 28} + + ax.legend(handles=legend_elements, fontsize=font["size"] - 12) + + # x_label = 'Pressure [Pa]' if OutputName == 'p' else 'velocity [m/s]' + # ax.set_xlabel(x_label, fontdict=font) + ax.set_ylabel("Count", fontdict=font) # ax.set_xscale('log') - ax.tick_params(axis='both', which='major', labelsize=font['size']) - plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0)) - plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0)) - ax.yaxis.get_offset_text().set_fontsize(font['size']) - ax.xaxis.get_offset_text().set_fontsize(font['size']) + ax.tick_params(axis="both", which="major", labelsize=font["size"]) + plt.ticklabel_format(axis="x", style="sci", scilimits=(0, 0)) + plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0)) + ax.yaxis.get_offset_text().set_fontsize(font["size"]) + ax.xaxis.get_offset_text().set_fontsize(font["size"]) plt.grid(True) - # if case == 'Calib': ax.set_xlim(np.min(modelRuns),np.max(modelRuns)) + # if case == "Calib": + # ax.set_xlim(np.min(modelRuns), np.max(modelRuns)) - title = 'Point ID: '+str(x) - plt.title(title,fontdict=font) + title = "Point ID: " + str(x) + plt.title(title, fontdict=font) plt.tight_layout(pad=0.95) - plotname = OutputName if OutputName == 'p' else 'velocity' - fig.savefig('./'+OutputDir+'/'+plotname+'.svg', bbox_inches='tight') - fig.savefig('./'+OutputDir+'/'+plotname+'.pdf', bbox_inches='tight') + plotname = OutputName if OutputName == "p" else "velocity" + fig.savefig("./" + OutputDir + "/" + plotname + ".svg", bbox_inches="tight") + fig.savefig("./" + OutputDir + "/" + plotname + ".pdf", bbox_inches="tight") plt.close() -#modelName = 'ffpm-stokespnm' #stokespnm stokesdarcyER stokesdarcyBJ -#postPredictiveplot(modelName, errorPrec=0.05,averaging=True, case='Calib', bins=20) -#postPredictiveplot(modelName+'-valid', errorPrec=0.05, case='Valid',bins=20) + +# modelName = 'ffpm-stokespnm' #stokespnm stokesdarcyER stokesdarcyBJ +# postPredictiveplot(modelName, errorPrec=0.05,averaging=True, case='Calib', bins=20) +# postPredictiveplot(modelName+'-valid', errorPrec=0.05, case='Valid',bins=20) diff --git a/BayesValidRox/tests/PA-A/util/vel_diagnostics.py b/BayesValidRox/tests/PA-A/util/vel_diagnostics.py index 5bcf5bbf8..dfb3b49ad 100644 --- a/BayesValidRox/tests/PA-A/util/vel_diagnostics.py +++ b/BayesValidRox/tests/PA-A/util/vel_diagnostics.py @@ -12,116 +12,137 @@ import scipy.stats as stats import shutil import pandas as pd import seaborn as sns +import h5py import matplotlib from sklearn.metrics import mean_squared_error from matplotlib.backends.backend_pdf import PdfPages -matplotlib.use('agg') +matplotlib.use("agg") -path = '../' -inletLoc = 'top' # top or left -inclusion = 'squared' # squared or circular +path = "../" +inletLoc = "top" # top or left +inclusion = "squared" # squared or circular -Name = 'stokespnmNA' #stokesdarcyER stokesdarcyBJ stokespnm stokespnmNA -modelName = 'ffpm-{}_{}_inclusion_{}Inflow'.format(Name,inclusion,inletLoc) +Name = "stokesdarcyBJ" # stokesdarcyER stokesdarcyBJ stokespnm stokespnmNA +modelName = "ffpm-{}_{}_inclusion_{}Inflow".format(Name, inclusion, inletLoc) # Load the objects -with open(path+'PCEModel_'+modelName+'.pkl', 'rb') as input: +with open(path + "PCEModel_" + modelName + ".pkl", "rb") as input: PCEModel = joblib.load(input) import matplotlib.pylab as plt + SIZE = 30 -plt.rc('figure', figsize = (24, 16)) -plt.rc('font', family='serif', serif='Arial') -plt.rc('font', size=SIZE) -plt.rc('text', usetex=True) -plt.rc('axes', linewidth=3) -plt.rc('axes', grid=True) -plt.rc('grid', linestyle="-") -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) # legend fontsize -plt.rc('figure', titlesize=SIZE) # fontsize of the figure title +plt.rc("figure", figsize=(24, 16)) +plt.rc("font", family="serif", serif="Arial") +plt.rc("font", size=SIZE) +plt.rc("text", usetex=True) +plt.rc("axes", linewidth=3) +plt.rc("axes", grid=True) +plt.rc("grid", linestyle="-") +plt.rc("axes", titlesize=SIZE) # fontsize of the axes title +plt.rc("axes", labelsize=SIZE) # fontsize of the x and y labels +plt.rc("xtick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("ytick", labelsize=SIZE) # fontsize of the tick labels +plt.rc("legend", fontsize=SIZE) # legend fontsize +plt.rc("figure", titlesize=SIZE) # fontsize of the figure title # Add BayesValidRox path -sys.path.insert(0,'./../../../BayesValidRox/') +sys.path.insert(0, "./../../../BayesValidRox/") -output = 'velocity [m/s]' #'velocity [m/s]' 'p' +output = "velocity [m/s]" #'velocity [m/s]' 'p' EDX = PCEModel.ExpDesign.X EDY = PCEModel.ExpDesign.Y[output] # Data -data = pd.read_csv('../'+PCEModel.ModelObj.MeasurementFile) +data = pd.read_csv("../" + PCEModel.ModelObj.MeasurementFile) -pdf = PdfPages('{}_Simulations_vs_PCE.pdf'.format(Name)) +pdf = PdfPages("{}_Simulations_vs_PCE.pdf".format(Name)) # Load Validsets -import h5py -HDF5File = "ExpDesign_"+modelName+"-testset_Calibration.hdf5" -ValidSets = h5py.File("../data/ValidationSets/"+HDF5File, 'r+') +HDF5File = "ExpDesign_" + modelName + "-testset_Calibration.hdf5" +ValidSets = h5py.File("../data/ValidationSets/" + HDF5File, "r+") validSamples = np.array(ValidSets["EDX/init_"]) EDX = validSamples -EDY = np.array(ValidSets["EDY/"+output+"/init_"]) +EDY = np.array(ValidSets["EDY/" + output + "/init_"]) ValidSets.close() -critIdx = [EDX[:,1]>=0.005] +critIdx = [EDX[:, 1] >= 0.005] # critIdx = [EDX[:,1]<0.005] # Critical Velocities -critEDX = EDX#[critIdx] -critEDY = EDY#[critIdx] +critEDX = EDX # [critIdx] +critEDY = EDY # [critIdx] # Plot for each Point ID print("\n>>>>> Errors of {} <<<<<".format(output)) print("\nIndex | RMSE") -print('-'*35) -for idx in range(1,11): +print("-" * 35) +for idx in range(1, 11): # Run the surrogate model y_hat, y_std = PCEModel.eval_metamodel(samples=critEDX) - + # Plot the predictions vs simulations fig, ax = plt.subplots() plt.title("Point ID: {}".format(idx)) - + # Simulations - plt.plot(critEDY[:,idx-1], color='navy',marker='x', linewidth=2, - label='Simulation') - + plt.plot( + critEDY[:, idx - 1], color="navy", marker="x", linewidth=2, label="Simulation" + ) + # Plot the predition and its uncertainty - plt.plot(y_hat[output][:,idx-1], color='green', ls='--',marker='x', - linewidth=2, label='Surrogate Approximation') - - plt.fill_between(np.arange(len(critEDY)),y_hat[output][:,idx-1]-1*y_std[output][:,idx-1], - y_hat[output][:,idx-1]+1*y_std[output][:,idx-1], - color='green',alpha=0.25) - - RMSE = mean_squared_error(critEDY[:,idx-1], y_hat[output][:,idx-1], - squared=False, multioutput='raw_values') - + plt.plot( + y_hat[output][:, idx - 1], + color="green", + ls="--", + marker="x", + linewidth=2, + label="Surrogate Approximation", + ) + + plt.fill_between( + np.arange(len(critEDY)), + y_hat[output][:, idx - 1] - 1 * y_std[output][:, idx - 1], + y_hat[output][:, idx - 1] + 1 * y_std[output][:, idx - 1], + color="green", + alpha=0.25, + ) + + RMSE = mean_squared_error( + critEDY[:, idx - 1], + y_hat[output][:, idx - 1], + squared=False, + multioutput="raw_values", + ) + # Plot data - plt.hlines(data[output][idx-1],0,len(critEDY[:,idx-1]), linewidth=3, - color='red',label='Ref. data') - + plt.hlines( + data[output][idx - 1], + 0, + len(critEDY[:, idx - 1]), + linewidth=3, + color="red", + label="Ref. data", + ) + # Print a report table - print('\n'.join('{0} | {1:.3e}'.format(idx,k) for i,k \ - in enumerate(RMSE))) + print("\n".join("{0} | {1:.3e}".format(idx, k) for i, k in enumerate(RMSE))) # plt.plot(np.arange(len(critEDY),len(EDX)), EDY[normIdx][:,idx-1], color='red', linewidth=2) - + plt.ylabel(output) plt.xlabel("Run number") plt.legend(loc="best") plt.show() - + # save the current figure - pdf.savefig(fig, bbox_inches='tight') + pdf.savefig(fig, bbox_inches="tight") # Destroy the current plot plt.clf() - + pdf.close() -- GitLab