diff --git a/examples/sinusoidal-function/data/sparse_solver_comparison.py b/examples/sinusoidal-function/data/sparse_solver_comparison.py new file mode 100644 index 0000000000000000000000000000000000000000..8bf8e00ca4b32c46ba7df8651397f969669c735b --- /dev/null +++ b/examples/sinusoidal-function/data/sparse_solver_comparison.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 10 09:44:05 2022 + +@author: farid +""" + +import numpy as np +import joblib +import os +import scipy.stats as st + +import sys +sys.path.append("../../../src/bayesvalidrox/") + +from pylink.pylink import PyLinkForwardModel +from surrogate_models.inputs import Input +from surrogate_models.surrogate_models import MetaModel +from post_processing.post_processing import PostProcessing +from bayes_inference.bayes_inference import BayesInference +from bayes_inference.discrepancy import Discrepancy +import matplotlib +matplotlib.use('agg') +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.ticker as ticker +from matplotlib.offsetbox import AnchoredText +from matplotlib.patches import Patch +import matplotlib.pyplot as plt +# Load the mplstyle +plt.style.use(os.path.join( + os.path.split(__file__)[0], + '../../../src/bayesvalidrox/', 'bayesvalidrox.mplstyle')) + + +def plot_seq_design_diagnostics(meta_model, util_funcs, + ref_BME_KLD=None, save_fig=True): + """ + Plots the Bayesian Model Evidence (BME) and Kullback-Leibler divergence + (KLD) for the sequential design. + + Parameters + ---------- + ref_BME_KLD : array, optional + Reference BME and KLD . The default is `None`. + save_fig : bool, optional + Whether to save the figures. The default is `True`. + + Returns + ------- + None. + + """ + n_init_samples = meta_model.ExpDesign.n_init_samples + n_total_samples = meta_model.ExpDesign.X.shape[0] + + if save_fig: + newpath = f'{path}/boxplot_{model_name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) + + plotList = ['Modified LOO error', 'Validation error', 'KLD', 'BME', + 'RMSEMean', 'RMSEStd', 'Hellinger distance'] + seqList = [meta_model.SeqModifiedLOO, meta_model.seqValidError, + meta_model.SeqKLD, meta_model.SeqBME, meta_model.seqRMSEMean, + meta_model.seqRMSEStd, meta_model.SeqDistHellinger] + + markers = ('x', 'o', 'd', '*', '+') + colors = ('k', 'darkgreen', 'b', 'navy', 'darkred') + + # Plot the evolution of the diagnostic criteria of the + # Sequential Experimental Design. + for plotidx, plot in enumerate(plotList): + fig, ax = plt.subplots(figsize=(27, 15)) + seq_dict = seqList[plotidx] + name_util = list(seq_dict.keys()) + + if len(name_util) == 0: + continue + + # Box plot when Replications have been detected. + if any(int(name.split("rep_", 1)[1]) > 1 for name in name_util): + # Extract the values from dict + sorted_seq_opt = {} + # Number of replications + n_reps = meta_model.ExpDesign.n_replication + + # Get the list of utility function names + # Handle if only one UtilityFunction is provided + if not isinstance(util_funcs, list): + util_funcs = [util_funcs] + + for util in util_funcs: + sortedSeq = {} + # min number of runs available from reps + n_runs = min([seq_dict[f'{util}_rep_{i+1}'].shape[0] + for i in range(n_reps)]) + + for runIdx in range(n_runs): + values = [] + for key in seq_dict.keys(): + if util in key: + values.append(seq_dict[key][runIdx].mean()) + sortedSeq['SeqItr_'+str(runIdx)] = np.array(values) + sorted_seq_opt[util] = sortedSeq + + # BoxPlot + def draw_plot(data, labels, edge_color, fill_color, idx): + pos = labels - (4*idx-6) + bp = plt.boxplot(data, positions=pos, labels=labels, + patch_artist=True, sym='', widths=3) + + ax.plot(pos, np.median(data, axis=0), lw=4, color=fill_color[idx]) + + elements = ['boxes', 'whiskers', 'fliers', 'means', + 'medians', 'caps'] + for element in elements: + plt.setp(bp[element], color=edge_color[idx], alpha=0.6) + + for patch in bp['boxes']: + patch.set(facecolor=fill_color[idx], alpha=0.6) + + if meta_model.ExpDesign.n_new_samples != 1: + step1 = meta_model.ExpDesign.n_new_samples + step2 = 1 + else: + step1 = 10 + step2 = 10 + edge_color = ['red', 'blue', 'green', 'black'] + fill_color = ['tan', 'cyan', 'lightgreen', 'grey'] + plot_label = plot + # Plot for different Utility Functions + for idx, util in enumerate(util_funcs): + all_errors = np.empty((n_reps, 0)) + + for key in list(sorted_seq_opt[util].keys()): + errors = sorted_seq_opt.get(util, {}).get(key)[:, None] + all_errors = np.hstack((all_errors, errors)) + + # Special cases for BME and KLD + if plot == 'KLD' or plot == 'BME': + # BME convergence if refBME is provided + if ref_BME_KLD is not None: + if plot == 'BME': + refValue = ref_BME_KLD[0] + plot_label = r'BME/BME$^{Ref.}$' + if plot == 'KLD': + refValue = ref_BME_KLD[1] + plot_label = '$D_{KL}[p(\\theta|y_*),p(\\theta)]'\ + ' / D_{KL}^{Ref.}[p(\\theta|y_*), '\ + 'p(\\theta)]$' + + # Difference between BME/KLD and the ref. values + all_errors = np.divide(all_errors, + np.full((all_errors.shape), + refValue)) + + # Plot baseline for zero, i.e. no difference + plt.axhline(y=1.0, xmin=0, xmax=1, c='green', + ls='--', lw=2) + + # Plot each UtilFuncs + # labels = np.arange(n_init_samples, n_total_samples+1, step1) + labels = np.array([10, 30, 50, 70, 90, 120, 150, 200]) + indices = [0, 20, 40, 60, 80, 110, 140, 190] + draw_plot(all_errors[:, indices], labels, edge_color, + fill_color, idx) + # draw_plot(all_errors[:, ::step2], labels, edge_color, + # fill_color, idx) + + plt.xticks(labels) + # Set the major and minor locators + # ax.xaxis.set_major_locator(ticker.AutoLocator()) + # ax.xaxis.set_minor_locator(ticker.AutoMinorLocator()) + # ax.xaxis.grid(True, which='major', linestyle='-') + # ax.xaxis.grid(True, which='minor', linestyle='--') + + # Shade + for center in [30, 70, 120, 200]:#labels[::2]: + ax.axvspan(center-8, center+8, alpha=0.1, color='grey') + + # Legend + legend_elements = [] + for idx, util in enumerate(util_funcs): + legend_elements.append(Patch(facecolor=fill_color[idx], + edgecolor=edge_color[idx], + label=util)) + plt.legend(handles=legend_elements[::-1], loc='best') + + if plot != 'BME' and plot != 'KLD': + plt.yscale('log') + plt.autoscale(True) + # ax.yaxis.set_minor_locator(ticker.LogLocator(numticks=999, subs="auto")) + ax.yaxis.grid(True, which='minor', linestyle='--') + plt.xlabel('\\# of training samples', fontsize=f_size) + plt.ylabel(plot_label, fontsize=f_size) + # plt.title(plot) + plt.xticks(fontsize=f_size) + plt.yticks(fontsize=f_size) + + if save_fig: + # save the current figure + plot_name = plot.replace(' ', '_') + fig.savefig( + f'{newpath}/boxplot_solver_ishigami_{plot_name}.pdf', + bbox_inches='tight' + ) + # Destroy the current plot + plt.clf() + return + + +if __name__ == "__main__": + # Set variables + model_name = 'Ishigami' + solvers = ['FastLaplace', 'FastARD', 'OMP', 'OLS'] + path = f'/home/farid/bwSyncShare/Scientific_LH2/Promotion/dissertation/surrogate/data-ishigami/' + f_size = 45 + + all_loo_errors = {} + all_valid_errors = {} + for solver in solvers: + # reading the data from the file + with open(f"{path}/{solver}/PCEModel_{model_name}.pkl", "rb") as input: + meta_model = joblib.load(input) + + # Update name and Concatenate + all_valid_errors.update({key.replace('ALM', solver): value + for key, value in + meta_model.seqValidError.items() + }) + all_loo_errors.update({key.replace('ALM', solver): value + for key, value in + meta_model.SeqModifiedLOO.items() + }) + meta_model.seqValidError = all_valid_errors + meta_model.SeqModifiedLOO = all_loo_errors + + # Plot box plot + plot_seq_design_diagnostics(meta_model, solvers) diff --git a/examples/sinusoidal-function/data/valid_outputs.npy b/examples/sinusoidal-function/data/valid_outputs.npy new file mode 100644 index 0000000000000000000000000000000000000000..049f39311e41b8f0b00d443078068385470af45f Binary files /dev/null and b/examples/sinusoidal-function/data/valid_outputs.npy differ diff --git a/examples/sinusoidal-function/data/valid_samples.npy b/examples/sinusoidal-function/data/valid_samples.npy new file mode 100644 index 0000000000000000000000000000000000000000..882ca288df7951d0025dd36e2ed4823c3ae361f9 Binary files /dev/null and b/examples/sinusoidal-function/data/valid_samples.npy differ diff --git a/examples/sinusoidal-function/test.ipynb b/examples/sinusoidal-function/test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0ff49e1e71323ec44763301fed8e84353a84a173 --- /dev/null +++ b/examples/sinusoidal-function/test.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are no estimations of surrogate uncertainty available for the chosen regression options. This might lead to issues in later steps.\n", + "\n", + " Now the forward model needs to be run!\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Computing orth. polynomial coeffs: 100%|##########| 1/1 [00:01<00:00, 1.61s/it]\n", + "Running forward model valid: 100%|██████████| 3/3 [00:00<00:00, 4.28it/s]\n", + "Running forward model valid: 100%|██████████| 1000/1000 [00:01<00:00, 775.69it/s]\n", + "Running forward model validSet: 100%|██████████| 3/3 [00:00<00:00, 4.29it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + ">>>>> Errors of Z <<<<<\n", + "\n", + "Index | RMSE | Validation Error\n", + "-----------------------------------\n", + "1 | 1.319e-07 | 4.873e-13\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import sys\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "matplotlib.use('agg')\n", + "\n", + "sys.path.append(\"../../src/\")\n", + "\n", + "from bayesvalidrox import PyLinkForwardModel, Input, ExpDesigns, PCE, PostProcessing, Discrepancy, Engine\n", + "\n", + "if __name__ == \"__main__\":\n", + " \n", + " # =====================================================\n", + " # ============= COMPUTATIONAL MODEL ================\n", + " # =====================================================\n", + " Model=PyLinkForwardModel()\n", + "\n", + " Model.link_type = 'Function'\n", + " Model.py_file = 'sinusoidal_function'\n", + " Model.name = 'SinusoidalFunc'\n", + "\n", + " Model.Output.names = ['Z']\n", + "\n", + " # =====================================================\n", + " # ========= PROBABILISTIC INPUT MODEL ==============\n", + " # =====================================================\n", + " # Define the uncertain parameters with their mean and\n", + " # standard deviation\n", + " Inputs = Input()\n", + "\n", + " Inputs.add_marginals()\n", + " Inputs.Marginals[0].name = 'X'\n", + " Inputs.Marginals[0].dist_type = 'uniform'\n", + " Inputs.Marginals[0].parameters = [0,10]\n", + "\n", + " # =====================================================\n", + " # ========== DEFINITION OF THE METAMODEL ============\n", + " # =====================================================\n", + " MetaModelOpts = PCE(Inputs)\n", + "\n", + " MetaModelOpts.meta_model_type = 'aPCE'\n", + "\n", + " # ------------------------------------------------\n", + " # ------------- PCE Specification ----------------\n", + " # ------------------------------------------------\n", + " MetaModelOpts._pce_reg_method = 'OLS'\n", + "\n", + " MetaModelOpts.bootstrap_method = 'fast'\n", + " MetaModelOpts.n_bootstrap_itrs = 1\n", + "\n", + " MetaModelOpts._pce_deg = 15\n", + "\n", + " MetaModelOpts._pce_q_norm = 1\n", + "\n", + " # ------------------------------------------------\n", + " # ------ Experimental Design Configuration -------\n", + " # ------------------------------------------------\n", + " ExpDesign = ExpDesigns(Inputs)\n", + "\n", + " #ExpDesign.method = 'sequential'\n", + " ExpDesign.n_init_samples = 100\n", + "\n", + " ExpDesign.sampling_method = 'latin_hypercube' #is this correct? \n", + "\n", + " ExpDesign.n_new_samples = 1\n", + " ExpDesign.n_max_samples = 3\n", + " ExpDesign.mod_LOO_threshold = 1e-16\n", + "\n", + " ExpDesign.tradeoff_scheme = None\n", + " ExpDesign.explore_method = 'latin_hypercube' #is this correct? How do I choose this?\n", + "\n", + " ExpDesign.n_canddidate = 1000\n", + " ExpDesign.n_cand_groups = 4\n", + "\n", + " # -------- Exploitation ------\n", + " ExpDesign.exploit_method = 'Space-filling'\n", + "\n", + " #BayesOptDesign -> when data is available \n", + "\n", + " # VarBasedOptDesign -> when data is not available\n", + " ExpDesign.util_func = 'ALM'\n", + "\n", + " ExpDesign.valid_samples = np.load(\"./data/valid_samples.npy\")\n", + " ExpDesign.valid_model_runs = {'Z': np.load(\"./data/valid_samples.npy\")}\n", + "\n", + " # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", + " MetaModelOpts.ExpDesign = ExpDesign\n", + " egine = Engine(MetaModelOpts, Model, ExpDesign)\n", + " egine.start_engine()\n", + " egine.train_normal()\n", + "\n", + " # =====================================================\n", + " # ========= POST PROCESSING OF METAMODELS ===========\n", + " # =====================================================\n", + " PostPCE = PostProcessing(egine)\n", + "\n", + " #Plot to check validation visually\n", + " PostPCE.valid_metamodel(n_samples=3)\n", + "\n", + " #Check the quality of your regression model \n", + " PostPCE.check_reg_quality()\n", + "\n", + " #Compute and print RMSE error\n", + " PostPCE.check_accuracy(n_samples=3)\n", + "\n", + " PostPCE.plot_moments()\n", + "\n", + " # if MetaModelOpts.ExpDesign.method == 'sequential':\n", + " # PostPCE.plot_seq_design_diagnostics()\n", + "\n", + " total_sobol = PostPCE.sobol_indices()\n", + "\n", + " X = list(ExpDesign.X)\n", + " Y = list(ExpDesign.Y.values())\n", + "\n", + " X = [float(x[0]) for x in X]\n", + " Y = [float(y) for sublist in Y for y in sublist]\n", + "\n", + " #print(f\"x-values {len(X)} \\n y-values {len(Y)}\")\n", + "\n", + " plt.plot(X, Y, 'g', label='PCE predictor - LSTSQ')\n", + " plt.scatter(X, Y, label='training data')\n", + " #plt.plot(x_, f, 'm', label='function')\n", + " plt.title('PCE surrogate - prediction accuracy')\n", + " plt.legend()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(X, Y, 'g', label='PCE predictor - LSTSQ')\n", + "plt.scatter(X, Y, label='training data')\n", + "#plt.plot(x_, f, 'm', label='function')\n", + "plt.title('PCE surrogate - prediction accuracy')\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "meta_models", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}