diff --git a/examples/OHagan-function/OHagan.py b/examples/OHagan-function/OHagan.py new file mode 100644 index 0000000000000000000000000000000000000000..71a58e092600bcd5edd0aca320d0ba6b840e9152 --- /dev/null +++ b/examples/OHagan-function/OHagan.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Nov 19 08:56:21 2018 + +@author: farid +""" +import numpy as np + + +def OHagan(xx, *args): + """ + Oakley & O'Hagan (2004) Function + + This function's a-coefficients are chosen so that 5 of the input variables + contribute significantly to the output variance, 5 have a much smaller + effect, and the remaining 5 have almost no effect on the output variance. + + O'Hagan, 2004, Probabilistic sensitivity analysis of complex models: a + Bayesian approach J. R. Statist. Soc. B (2004) 66, Part 3, pp. 751-769. + + Parameters + ---------- + xx : array of shape (n_samples, n_params) + Input parameter sets. + + Returns + ------- + output: dict + Ourput value. + + """ + n_samples, n_params = xx.shape + + # Read M + M = np.loadtxt('data/M.csv', delimiter=',') + + a_1 = np.array([0.0118, 0.0456, 0.2297, 0.0393, 0.1177, 0.3865, 0.3897, + 0.6061, 0.6159, 0.4005, 1.0741, 1.1474, 0.7880, 1.1242, + 1.1982]) + a_2 = np.array([0.4341, 0.0887, 0.0512, 0.3233, 0.1489, 1.0360, 0.9892, + 0.9672, 0.8977, 0.8083, 1.8426, 2.4712, 2.3946, 2.0045, + 2.2621]) + a_3 = np.array([0.1044, 0.2057, 0.0774, 0.2730, 0.1253, 0.7526, 0.8570, + 1.0331, 0.8388, 0.7970, 2.2145, 2.0382, 2.4004, 2.0541, + 1.9845]) + + # Compute response + xx_M = np.dot(xx, M) + last_term = np.sum(xx_M * xx, axis=1) + + y = np.dot(xx, a_1) + np.dot(np.sin(xx), a_2) + np.dot(np.cos(xx), a_3) + y += last_term + + # Prepare output dict using standard bayesvalidrox format + output = {'x_values': np.zeros(1), 'Z': y.T} + + return output diff --git a/examples/OHagan-function/data/M.csv b/examples/OHagan-function/data/M.csv new file mode 100644 index 0000000000000000000000000000000000000000..98ec96817d7910bac21cd1ea54d54d7a5f432ad8 --- /dev/null +++ b/examples/OHagan-function/data/M.csv @@ -0,0 +1,15 @@ +-0.022482886,-0.18501666,0.13418263,0.36867264,0.17172785,0.13651143,-0.44034404,-0.081422854,0.71321025,-0.44361072,0.50383394,-0.024101458,-0.045939684,0.21666181,0.055887417 +0.2565963,0.053792287,0.25800381,0.23795905,-0.59125756,-0.081627077,-0.28749073,0.41581639,0.49752241,0.083893165,-0.11056683,0.033222351,-0.13979497,-0.031020556,-0.22318721 +-0.055999811,0.19542252,0.095529005,-0.2862653,-0.14441303,0.22369356,0.14527412,0.28998481,0.2310501,-0.31929879,-0.29039128,-0.20956898,0.43139047,0.024429152,0.044904409 +0.66448103,0.43069872,0.29924645,-0.16202441,-0.31479544,-0.39026802,0.17679822,0.057952663,0.17230342,0.13466011,-0.3527524,0.25146896,-0.018810529,0.36482392,-0.32504618 +-0.121278,0.12463327,0.10656519,0.046562296,-0.21678617,0.19492172,-0.065521126,0.024404669,-0.09682886,0.19366196,0.33354757,0.31295994,-0.083615456,-0.25342082,0.37325717 +-0.2837623,-0.32820154,-0.10496068,-0.22073452,-0.13708154,-0.14426375,-0.11503319,0.22424151,-0.030395022,-0.51505615,0.017254978,0.038957118,0.36069184,0.30902452,0.050030193 +-0.077875893,0.003745656,0.88685604,-0.26590028,-0.079325357,-0.042734919,-0.18653782,-0.35604718,-0.17497421,0.088699956,0.40025886,-0.055979693,0.13724479,0.21485613,-0.011265799 +-0.09229473,0.59209563,0.031338285,-0.033080861,-0.24308858,-0.099798547,0.034460195,0.095119813,-0.3380162,0.0063860024,-0.61207299,0.081325416,0.88683114,0.14254905,0.14776204 +-0.13189434,0.52878496,0.12652391,0.045113625,0.58373514,0.37291503,0.11395325,-0.29479222,-0.57014085,0.46291592,-0.094050179,0.13959097,-0.38607402,-0.4489706,-0.14602419 +0.058107658,-0.32289338,0.093139162,0.072427234,-0.56919401,0.52554237,0.23656926,-0.011782016,0.071820601,0.078277291,-0.13355752,0.22722721,0.14369455,-0.45198935,-0.55574794 +0.66145875,0.34633299,0.14098019,0.51882591,-0.28019898,-0.1603226,-0.068413337,-0.20428242,0.069672173,0.23112577,-0.044368579,-0.16455425,0.21620977,0.0042702105,-0.087399014 +0.31599556,-0.027551859,0.13434254,0.13497371,0.05400568,-0.17374789,0.17525393,0.060258929,-0.17914162,-0.31056619,-0.25358691,0.025847535,-0.43006001,-0.62266361,-0.033996882 +-0.29038151,0.03410127,0.034903413,-0.12121764,0.026030714,-0.33546274,-0.41424111,0.05324838,-0.27099455,-0.026251302,0.41024137,0.26636349,0.15582891,-0.18666254,0.019895831 +-0.24388652,-0.44098852,0.012618825,0.24945112,0.071101888,0.24623792,0.17484502,0.0085286769,0.2514707,-0.14659862,-0.08462515,0.36931333,-0.29955293,0.1104436,-0.75690139 +0.041494323,-0.25980564,0.46402128,-0.36112127,-0.94980789,-0.16504063,0.0030943325,0.052792942,0.22523648,0.38390366,0.45562427,-0.18631744,0.0082333995,0.16670803,0.16045688 diff --git a/examples/OHagan-function/data/sparse_solver_comparison.py b/examples/OHagan-function/data/sparse_solver_comparison.py new file mode 100644 index 0000000000000000000000000000000000000000..5245bb89d56ecba9967e2c1f1ddb90438dc23ce9 --- /dev/null +++ b/examples/OHagan-function/data/sparse_solver_comparison.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 10 09:44:05 2022 + +@author: farid +""" + +import numpy as np +import joblib +import os +import scipy.stats as st + +import sys +sys.path.append("../../../src/bayesvalidrox/") + +from pylink.pylink import PyLinkForwardModel +from surrogate_models.inputs import Input +from surrogate_models.surrogate_models import MetaModel +from post_processing.post_processing import PostProcessing +from bayes_inference.bayes_inference import BayesInference +from bayes_inference.discrepancy import Discrepancy +import matplotlib +matplotlib.use('agg') +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.ticker as ticker +from matplotlib.offsetbox import AnchoredText +from matplotlib.patches import Patch +import matplotlib.pyplot as plt +# Load the mplstyle +plt.style.use(os.path.join( + os.path.split(__file__)[0], + '../../../src/bayesvalidrox/', 'bayesvalidrox.mplstyle')) + + +def plot_seq_design_diagnostics(meta_model, util_funcs, + ref_BME_KLD=None, save_fig=True): + """ + Plots the Bayesian Model Evidence (BME) and Kullback-Leibler divergence + (KLD) for the sequential design. + + Parameters + ---------- + ref_BME_KLD : array, optional + Reference BME and KLD . The default is `None`. + save_fig : bool, optional + Whether to save the figures. The default is `True`. + + Returns + ------- + None. + + """ + n_init_samples = meta_model.ExpDesign.n_init_samples + n_total_samples = meta_model.ExpDesign.X.shape[0] + + if save_fig: + newpath = f'{path}/boxplot_{model_name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) + + plotList = ['Modified LOO error', 'Validation error', 'KLD', 'BME', + 'RMSEMean', 'RMSEStd', 'Hellinger distance'] + seqList = [meta_model.SeqModifiedLOO, meta_model.seqValidError, + meta_model.SeqKLD, meta_model.SeqBME, meta_model.seqRMSEMean, + meta_model.seqRMSEStd, meta_model.SeqDistHellinger] + + markers = ('x', 'o', 'd', '*', '+') + colors = ('k', 'darkgreen', 'b', 'navy', 'darkred') + + # Plot the evolution of the diagnostic criteria of the + # Sequential Experimental Design. + for plotidx, plot in enumerate(plotList): + fig, ax = plt.subplots(figsize=(27, 15)) + seq_dict = seqList[plotidx] + name_util = list(seq_dict.keys()) + + if len(name_util) == 0: + continue + + # Box plot when Replications have been detected. + if any(int(name.split("rep_", 1)[1]) > 1 for name in name_util): + # Extract the values from dict + sorted_seq_opt = {} + # Number of replications + n_reps = meta_model.ExpDesign.n_replication + + # Get the list of utility function names + # Handle if only one UtilityFunction is provided + if not isinstance(util_funcs, list): + util_funcs = [util_funcs] + + for util in util_funcs: + sortedSeq = {} + # min number of runs available from reps + n_runs = min([seq_dict[f'{util}_rep_{i+1}'].shape[0] + for i in range(n_reps)]) + + for runIdx in range(n_runs): + values = [] + for key in seq_dict.keys(): + if util in key: + values.append(seq_dict[key][runIdx].mean()) + sortedSeq['SeqItr_'+str(runIdx)] = np.array(values) + sorted_seq_opt[util] = sortedSeq + + # BoxPlot + def draw_plot(data, labels, edge_color, fill_color, idx): + pos = labels - (4*idx-6) + bp = plt.boxplot(data, positions=pos, labels=labels, + patch_artist=True, sym='', widths=3) + + ax.plot(pos, np.median(data, axis=0), lw=4, color=fill_color[idx]) + + elements = ['boxes', 'whiskers', 'fliers', 'means', + 'medians', 'caps'] + for element in elements: + plt.setp(bp[element], color=edge_color[idx], alpha=0.6) + + for patch in bp['boxes']: + patch.set(facecolor=fill_color[idx], alpha=0.6) + + if meta_model.ExpDesign.n_new_samples != 1: + step1 = meta_model.ExpDesign.n_new_samples + step2 = 1 + else: + step1 = 10 + step2 = 10 + edge_color = ['red', 'blue', 'green', 'black'] + fill_color = ['tan', 'cyan', 'lightgreen', 'grey'] + plot_label = plot + # Plot for different Utility Functions + for idx, util in enumerate(util_funcs): + all_errors = np.empty((n_reps, 0)) + + for key in list(sorted_seq_opt[util].keys()): + errors = sorted_seq_opt.get(util, {}).get(key)[:, None] + all_errors = np.hstack((all_errors, errors)) + + # Special cases for BME and KLD + if plot == 'KLD' or plot == 'BME': + # BME convergence if refBME is provided + if ref_BME_KLD is not None: + if plot == 'BME': + refValue = ref_BME_KLD[0] + plot_label = r'BME/BME$^{Ref.}$' + if plot == 'KLD': + refValue = ref_BME_KLD[1] + plot_label = '$D_{KL}[p(\\theta|y_*),p(\\theta)]'\ + ' / D_{KL}^{Ref.}[p(\\theta|y_*), '\ + 'p(\\theta)]$' + + # Difference between BME/KLD and the ref. values + all_errors = np.divide(all_errors, + np.full((all_errors.shape), + refValue)) + + # Plot baseline for zero, i.e. no difference + plt.axhline(y=1.0, xmin=0, xmax=1, c='green', + ls='--', lw=2) + + # Plot each UtilFuncs + labels = np.arange(n_init_samples, n_total_samples+1, step1) + draw_plot(all_errors[:, ::step2], labels, edge_color, + fill_color, idx) + # labels = np.array([10, 30, 50, 70, 90, 120, 150, 200]) + # indices = [0, 20, 40, 60, 80, 110, 140, 190] + # draw_plot(all_errors[:, indices], labels, edge_color, + # fill_color, idx) + + plt.xticks(labels) + # Set the major and minor locators + # ax.xaxis.set_major_locator(ticker.AutoLocator()) + # ax.xaxis.set_minor_locator(ticker.AutoMinorLocator()) + # ax.xaxis.grid(True, which='major', linestyle='-') + # ax.xaxis.grid(True, which='minor', linestyle='--') + + # Shade + for center in labels[::2]: + ax.axvspan(center-8, center+8, alpha=0.1, color='grey') + + # Legend + legend_elements = [] + for idx, util in enumerate(util_funcs): + legend_elements.append(Patch(facecolor=fill_color[idx], + edgecolor=edge_color[idx], + label=util)) + plt.legend(handles=legend_elements[::-1], loc='best') + + if plot != 'BME' and plot != 'KLD': + plt.yscale('log') + plt.autoscale(True) + # ax.yaxis.set_minor_locator(ticker.LogLocator(numticks=999, subs="auto")) + ax.yaxis.grid(True, which='minor', linestyle='--') + plt.xlabel('\\# of training samples', fontsize=f_size) + plt.ylabel(plot_label, fontsize=f_size) + # plt.title(plot) + plt.xticks(fontsize=f_size) + plt.yticks(fontsize=f_size) + + if save_fig: + # save the current figure + plot_name = plot.replace(' ', '_') + fig.savefig( + f'{newpath}/boxplot_solver_ishigami_{plot_name}.pdf', + bbox_inches='tight' + ) + # Destroy the current plot + plt.clf() + return + + +if __name__ == "__main__": + # Set variables + model_name = 'borehole' + solvers = ['BCS', 'FastARD', 'OMP', 'OLS'] + path = f'/home/farid/bwSyncShare/Scientific_LH2/Promotion/dissertation/surrogate/data-borehole/' + f_size = 45 + + all_loo_errors = {} + all_valid_errors = {} + for solver in solvers: + # reading the data from the file + with open(f"{path}/{solver}/PCEModel_{model_name}.pkl", "rb") as input: + meta_model = joblib.load(input) + + # Update name and Concatenate + all_valid_errors.update({key.replace('ALM', solver): value + for key, value in + meta_model.seqValidError.items() + }) + all_loo_errors.update({key.replace('ALM', solver): value + for key, value in + meta_model.SeqModifiedLOO.items() + }) + meta_model.seqValidError = all_valid_errors + meta_model.SeqModifiedLOO = all_loo_errors + + # Plot box plot + plot_seq_design_diagnostics(meta_model, solvers) diff --git a/examples/OHagan-function/data/valid_outputs.npy b/examples/OHagan-function/data/valid_outputs.npy new file mode 100644 index 0000000000000000000000000000000000000000..6b0f28b72e05d71f6a0c7a677f416e9493301ac0 Binary files /dev/null and b/examples/OHagan-function/data/valid_outputs.npy differ diff --git a/examples/OHagan-function/data/valid_samples.npy b/examples/OHagan-function/data/valid_samples.npy new file mode 100644 index 0000000000000000000000000000000000000000..2a2a38e1a1718d94f39243fac3d0f047fdd870d1 Binary files /dev/null and b/examples/OHagan-function/data/valid_samples.npy differ diff --git a/examples/OHagan-function/test_OHagan.py b/examples/OHagan-function/test_OHagan.py new file mode 100644 index 0000000000000000000000000000000000000000..4715ecc525c2a016e078753b63016e8705a77888 --- /dev/null +++ b/examples/OHagan-function/test_OHagan.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This test deals with the surrogate modeling of a Ishigami function. + +You will see how to: + Check the quality of your regression model + Perform sensitivity analysis via Sobol Indices + +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Wed Jul 10 2019 + +""" + +import numpy as np +import joblib + +# import bayesvalidrox +# Add BayesValidRox path +import sys +sys.path.append("../../src/bayesvalidrox/") + +from pylink.pylink import PyLinkForwardModel +from surrogate_models.inputs import Input +from surrogate_models.surrogate_models import MetaModel +from post_processing.post_processing import PostProcessing +from bayes_inference.bayes_inference import BayesInference +from bayes_inference.discrepancy import Discrepancy +import matplotlib +matplotlib.use('agg') + +if __name__ == "__main__": + + # ===================================================== + # ============= COMPUTATIONAL MODEL ================ + # ===================================================== + Model = PyLinkForwardModel() + + # Define model options + Model.link_type = 'Function' + Model.py_file = 'OHagan' + Model.name = 'OHagan' + + Model.Output.names = ['Z'] + + # ===================================================== + # ========= PROBABILISTIC INPUT MODEL ============== + # ===================================================== + Inputs = Input() + + n_dim = 15 + + for i in range(n_dim): + Inputs.add_marginals() + Inputs.Marginals[i].name = "$\\theta_{"+str(i+1)+"}$" + Inputs.Marginals[i].dist_type = 'normal' + Inputs.Marginals[i].parameters = [0, 1] + + # ===================================================== + # ====== POLYNOMIAL CHAOS EXPANSION METAMODELS ====== + # ===================================================== + MetaModelOpts = MetaModel(Inputs, Model) + + # Select your metamodel method + # 1) PCE (Polynomial Chaos Expansion) 2) aPCE (arbitrary PCE) + # 3) GPE (Gaussian Process Emulator) + MetaModelOpts.meta_model_type = 'aPCE' + + # ------------------------------------------------ + # ------------- PCE Specification ---------------- + # ------------------------------------------------ + # Select the sparse least-square minimization method for + # the PCE coefficients calculation: + # 1)OLS: Ordinary Least Square 2)BRR: Bayesian Ridge Regression + # 3)LARS: Least angle regression 4)ARD: Bayesian ARD Regression + # 5)FastARD: Fast Bayesian ARD Regression + # 6)BCS: Bayesian Compressive Sensing + # 7)OMP: Orthogonal Matching Pursuit + # 8)VBL: Variational Bayesian Learning + # 9)EBL: Emperical Bayesian Learning + MetaModelOpts.pce_reg_method = 'FastARD' + + # Bootstraping + # 1) normal 2) fast + MetaModelOpts.bootstrap_method = 'fast' + MetaModelOpts.n_bootstrap_itrs = 1#00 + + # Specify the max degree to be compared by the adaptive algorithm: + # The degree with the lowest Leave-One-Out cross-validation (LOO) + # error (or the highest score=1-LOO)estimator is chosen as the final + # metamodel. pce_deg accepts degree as a scalar or a range. + MetaModelOpts.pce_deg = 7 + + # q-quasi-norm 0<q<1 (default=1) + MetaModelOpts.pce_q_norm = 0.5 + + # Print summary of the regression results + # MetaModelOpts.verbose = True + + # ------------------------------------------------ + # ------ Experimental Design Configuration ------- + # ------------------------------------------------ + MetaModelOpts.add_ExpDesign() + + # One-shot (normal) or Sequential Adaptive (sequential) Design + MetaModelOpts.ExpDesign.method = 'sequential' + MetaModelOpts.ExpDesign.n_init_samples = 100 + + # Sampling methods + # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) korobov + # 7) chebyshev(FT) 8) grid(FT) 9) nested_grid(FT) 10)user + MetaModelOpts.ExpDesign.sampling_method = 'user' + + # Provide the experimental design object with a hdf5 file + MetaModelOpts.ExpDesign.hdf5_file = 'ExpDesign_OHagan_orig.hdf5' + + # Sequential experimental design (needed only for sequential ExpDesign) + MetaModelOpts.ExpDesign.n_new_samples = 1 + MetaModelOpts.ExpDesign.n_max_samples = 500 # 150 + MetaModelOpts.ExpDesign.mod_LOO_threshold = 1e-16 + + # ------------------------------------------------ + # ------- Sequential Design configuration -------- + # ------------------------------------------------ + # 1) None 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive' + MetaModelOpts.ExpDesign.tradeoff_scheme = None + # MetaModelOpts.ExpDesign.n_replication = 50 + # -------- Exploration ------ + # 1)'Voronoi' 2)'random' 3)'latin_hypercube' 4)'dual annealing' + MetaModelOpts.ExpDesign.explore_method = 'latin_hypercube' + + # Use when 'dual annealing' chosen + MetaModelOpts.ExpDesign.max_func_itr = 200 + + # Use when 'Voronoi' or 'random' or 'latin_hypercube' chosen + MetaModelOpts.ExpDesign.n_canddidate = 10000 + MetaModelOpts.ExpDesign.n_cand_groups = 4 + + # -------- Exploitation ------ + # 1)'BayesOptDesign' 2)'VarOptDesign' 3)'alphabetic' 4)'Space-filling' + MetaModelOpts.ExpDesign.exploit_method = 'Space-filling' + + # BayesOptDesign -> when data is available + # 1)DKL (Kullback-Leibler Divergence) 2)DPP (D-Posterior-percision) + # 3)APP (A-Posterior-percision) + # MetaModelOpts.ExpDesign.util_func = 'DKL' + + # VarBasedOptDesign -> when data is not available + # Only with Vornoi >>> 1)Entropy 2)EIGF, 3)ALM, 4)LOOCV + MetaModelOpts.ExpDesign.util_func = 'ALM' + + # alphabetic + # 1)D-Opt (D-Optimality) 2)A-Opt (A-Optimality) + # 3)K-Opt (K-Optimality) + # MetaModelOpts.ExpDesign.util_func = 'D-Opt' + + MetaModelOpts.valid_samples = np.load("data/valid_samples.npy") + MetaModelOpts.valid_model_runs = { + 'Z': np.load("data/valid_outputs.npy") + } + # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + # Adaptive sparse arbitrary polynomial chaos expansion + # PCEModel = MetaModelOpts.create_metamodel() + from surrogate_models.meta_model_engine import MetaModelEngine + meta_model_engine = MetaModelEngine(MetaModelOpts) + meta_model_engine.run() + PCEModel = meta_model_engine.MetaModel + + # Save PCE models + with open(f'PCEModel_{Model.name}.pkl', 'wb') as output: + joblib.dump(PCEModel, output, 2) + + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== + PostPCE = PostProcessing(PCEModel) + + # Plot to check validation visually. + PostPCE.valid_metamodel(n_samples=200) + + # PostPCE.eval_PCEmodel_3D() + # Compute and print RMSE error + PostPCE.check_accuracy(n_samples=3000) + + # Plot the evolution of the KLD,BME, and Modified LOOCV error + if MetaModelOpts.ExpDesign.method == 'sequential': + PostPCE.plot_seq_design_diagnostics() + + # Plot the sobol indices + total_sobol = PostPCE.sobol_indices(plot_type='bar')