From 4e4169625ad3937527d47937fed0ecfebf922f65 Mon Sep 17 00:00:00 2001 From: Farid Mohammadi <farid.mohammadi@iws.uni-stuttgart.de> Date: Mon, 12 Sep 2022 11:14:12 +0200 Subject: [PATCH] [surrogate] add metamodel engine for better memory management. --- .../surrogate_models/meta_model_engine.py | 2219 +++++++++++++++++ .../surrogate_models/sequential_design.py | 279 ++- 2 files changed, 2358 insertions(+), 140 deletions(-) create mode 100644 src/bayesvalidrox/surrogate_models/meta_model_engine.py diff --git a/src/bayesvalidrox/surrogate_models/meta_model_engine.py b/src/bayesvalidrox/surrogate_models/meta_model_engine.py new file mode 100644 index 000000000..08d91ecf7 --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/meta_model_engine.py @@ -0,0 +1,2219 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Jan 28 09:21:18 2022 + +@author: farid +""" +import numpy as np +from scipy import stats, signal, linalg, sparse +from scipy.spatial import distance +from copy import deepcopy, copy +from tqdm import tqdm +import scipy.optimize as opt +from sklearn.metrics import mean_squared_error +import multiprocessing +import matplotlib.pyplot as plt +import sys +import os +import gc +import seaborn as sns +from joblib import Parallel, delayed +import resource +from .exploration import Exploration + + +class MetaModelEngine(): + """ Sequential experimental design + This class provieds method for trainig the meta-model in an iterative + manners. + The main method to execute the task is `train_seq_design`, which + recieves a model object and returns the trained metamodel. + """ + + def __init__(self, meta_model_opts): + self.MetaModel = meta_model_opts + + # ------------------------------------------------------------------------- + def run(self): + + Model = self.MetaModel.ModelObj + self.MetaModel.n_params = len(self.MetaModel.input_obj.Marginals) + self.MetaModel.ExpDesignFlag = 'normal' + # --- Prepare pce degree --- + if self.MetaModel.meta_model_type.lower() == 'pce': + if type(self.MetaModel.pce_deg) is not np.ndarray: + self.MetaModel.pce_deg = np.array(self.MetaModel.pce_deg) + + if self.MetaModel.ExpDesign.method == 'normal': + self.MetaModel.ExpDesignFlag = 'normal' + self.MetaModel.train_norm_design(verbose=True) + + elif self.MetaModel.ExpDesign.method == 'sequential': + self.train_seq_design() + else: + raise Exception("The method for experimental design you requested" + " has not been implemented yet.") + + # Zip the model run directories + if self.MetaModel.ModelObj.link_type.lower() == 'pylink' and\ + self.MetaModel.ExpDesign.sampling_method.lower() != 'user': + Model.zip_subdirs(Model.name, f'{Model.name}_') + + # ------------------------------------------------------------------------- + def train_seq_design(self): + """ + Starts the adaptive sequential design for refining the surrogate model + by selecting training points in a sequential manner. + + Returns + ------- + MetaModel : object + Meta model object. + + """ + Model = self.MetaModel.ModelObj + # MetaModel = self.MetaModel + self.Model = Model + + # Initialization + self.MetaModel.SeqModifiedLOO = {} + self.MetaModel.seqValidError = {} + self.MetaModel.SeqBME = {} + self.MetaModel.SeqKLD = {} + self.MetaModel.SeqDistHellinger = {} + self.MetaModel.seqRMSEMean = {} + self.MetaModel.seqRMSEStd = {} + self.MetaModel.seqMinDist = [] + + # Determine the metamodel type + if self.MetaModel.meta_model_type.lower() != 'gpe': + pce = True + else: + pce = False + mc_ref = True if bool(Model.mc_reference) else False + if mc_ref: + Model.read_mc_reference() + + if not hasattr(self.MetaModel, 'valid_samples'): + self.MetaModel.valid_samples = [] + self.MetaModel.valid_model_runs = [] + self.MetaModel.valid_likelihoods = [] + + # Get the parameters + max_n_samples = self.MetaModel.ExpDesign.n_max_samples + mod_LOO_threshold = self.MetaModel.ExpDesign.mod_LOO_threshold + n_canddidate = self.MetaModel.ExpDesign.n_canddidate + post_snapshot = self.MetaModel.ExpDesign.post_snapshot + n_replication = self.MetaModel.ExpDesign.n_replication + util_func = self.MetaModel.ExpDesign.util_func + output_name = Model.Output.names + validError = None + # Handle if only one UtilityFunctions is provided + if not isinstance(util_func, list): + util_func = [self.MetaModel.ExpDesign.util_func] + + # Read observations or MCReference + if len(Model.observations) != 0 or Model.meas_file is not None: + self.observations = Model.read_observation() + obs_data = self.observations + else: + obs_data = [] + TotalSigma2 = {} + # TODO: ---------- Initial self.MetaModel ---------- + # self.MetaModel.train_norm_design(parallel=False, verbose=True) + self.MetaModel.train_norm_design(verbose=True) + initMetaModel = deepcopy(self.MetaModel) + + # Validation error if validation set is provided. + if self.MetaModel.valid_model_runs: + init_rmse, init_valid_error = self.__validError(initMetaModel) + init_valid_error = list(init_valid_error.values()) + else: + init_rmse = None + + # Check if discrepancy is provided + if len(obs_data) != 0 and hasattr(self.MetaModel, 'Discrepancy'): + TotalSigma2 = self.MetaModel.Discrepancy.parameters + + # Calculate the initial BME + out = self.__BME_Calculator( + initMetaModel, obs_data, TotalSigma2, init_rmse) + init_BME, init_KLD, init_post, init_likes, init_dist_hellinger = out + print(f"\nInitial BME: {init_BME:.2f}") + print(f"Initial KLD: {init_KLD:.2f}") + + # Posterior snapshot (initial) + if post_snapshot: + parNames = self.MetaModel.ExpDesign.par_names + print('Posterior snapshot (initial) is being plotted...') + self.__posteriorPlot(init_post, parNames, 'SeqPosterior_init') + + # Check the convergence of the Mean & Std + if mc_ref and pce: + init_rmse_mean, init_rmse_std = self.__error_Mean_Std() + print(f"Initial Mean and Std error: {init_rmse_mean:.2f}," + f" {init_rmse_std:.2f}") + + # Read the initial experimental design + Xinit = initMetaModel.ExpDesign.X + init_n_samples = len(initMetaModel.ExpDesign.X) + initYprev = initMetaModel.ModelOutputDict + initLCerror = initMetaModel.LCerror + n_itrs = max_n_samples - init_n_samples + + # Read the initial ModifiedLOO + if pce: + Scores_all, varExpDesignY = [], [] + for out_name in output_name: + y = self.MetaModel.ExpDesign.Y[out_name] + Scores_all.append(list( + self.MetaModel.score_dict['b_1'][out_name].values())) + if self.MetaModel.dim_red_method.lower() == 'pca': + pca = self.MetaModel.pca['b_1'][out_name] + components = pca.transform(y) + varExpDesignY.append(np.var(components, axis=0)) + else: + varExpDesignY.append(np.var(y, axis=0)) + + Scores = [item for sublist in Scores_all for item in sublist] + weights = [item for sublist in varExpDesignY for item in sublist] + init_mod_LOO = [np.average([1-score for score in Scores], + weights=weights)] + + prevMetaModel_dict = {} + # Replicate the sequential design + for repIdx in range(n_replication): + print(f'\n>>>> Replication: {repIdx+1}<<<<') + + # To avoid changes ub original aPCE object + self.MetaModel.ExpDesign.X = Xinit + self.MetaModel.ExpDesign.Y = initYprev + self.MetaModel.LCerror = initLCerror + + for util_f in util_func: + print(f'\n>>>> Utility Function: {util_f} <<<<') + # To avoid changes ub original aPCE object + self.MetaModel.ExpDesign.X = Xinit + self.MetaModel.ExpDesign.Y = initYprev + self.MetaModel.LCerror = initLCerror + + # Set the experimental design + Xprev = Xinit + total_n_samples = init_n_samples + Yprev = initYprev + + Xfull = [] + Yfull = [] + + # Store the initial ModifiedLOO + if pce: + print("\nInitial ModifiedLOO:", init_mod_LOO) + SeqModifiedLOO = np.array(init_mod_LOO) + + if len(self.MetaModel.valid_model_runs) != 0: + SeqValidError = np.array(init_valid_error) + + # Check if data is provided + if len(obs_data) != 0: + SeqBME = np.array([init_BME]) + SeqKLD = np.array([init_KLD]) + SeqDistHellinger = np.array([init_dist_hellinger]) + + if mc_ref and pce: + seqRMSEMean = np.array([init_rmse_mean]) + seqRMSEStd = np.array([init_rmse_std]) + + # ------- Start Sequential Experimental Design ------- + postcnt = 1 + for itr_no in range(1, n_itrs+1): + print(f'\n>>>> Iteration number {itr_no} <<<<') + + # Save the metamodel prediction before updating + m_1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + prevMetaModel_dict[itr_no] = deepcopy(self.MetaModel) + if itr_no > 1: + pc_model = prevMetaModel_dict[itr_no-1] + self._y_hat_prev, _ = pc_model.eval_metamodel( + samples=Xfull[-1].reshape(1, -1)) + del prevMetaModel_dict[itr_no-1] + + # Optimal Bayesian Design + self.MetaModel.ExpDesignFlag = 'sequential' + Xnew, updatedPrior = self.opt_SeqDesign(TotalSigma2, + n_canddidate, + util_f) + m_2 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + S = np.min(distance.cdist(Xinit, Xnew, 'euclidean')) + self.MetaModel.seqMinDist.append(S) + print(f"\nmin Dist from OldExpDesign: {S:2f}") + print("\n") + + # Evaluate the full model response at the new sample + Ynew, _ = Model.run_model_parallel( + Xnew, prevRun_No=total_n_samples + ) + total_n_samples += Xnew.shape[0] + + # ------ Plot the surrogate model vs Origninal Model ------ + if hasattr(self.MetaModel, 'adapt_verbose') and \ + self.MetaModel.adapt_verbose: + from .adaptPlot import adaptPlot + y_hat, std_hat = self.MetaModel.eval_metamodel( + samples=Xnew + ) + adaptPlot( + self.MetaModel, Ynew, y_hat, std_hat, + plotED=False + ) + + # -------- Retrain the surrogate model ------- + # Extend new experimental design + Xfull = np.vstack((Xprev, Xnew)) + + # Updating experimental design Y + for out_name in output_name: + Yfull = np.vstack((Yprev[out_name], Ynew[out_name])) + self.MetaModel.ModelOutputDict[out_name] = Yfull + + # Pass new design to the metamodel object + self.MetaModel.ExpDesign.sampling_method = 'user' + self.MetaModel.ExpDesign.X = Xfull + self.MetaModel.ExpDesign.Y = self.MetaModel.ModelOutputDict + + # Save the Experimental Design for next iteration + Xprev = Xfull + Yprev = self.MetaModel.ModelOutputDict + + # Pass the new prior as the input + self.MetaModel.input_obj.poly_coeffs_flag = False + if updatedPrior is not None: + self.MetaModel.input_obj.poly_coeffs_flag = True + print("updatedPrior:", updatedPrior.shape) + # Arbitrary polynomial chaos + for i in range(updatedPrior.shape[1]): + self.MetaModel.input_obj.Marginals[i].dist_type = None + x = updatedPrior[:, i] + self.MetaModel.input_obj.Marginals[i].raw_data = x + + # Train the surrogate model for new ExpDesign + self.MetaModel.train_norm_design(parallel=False) + m_3 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + + # -------- Evaluate the retrained surrogate model ------- + # Extract Modified LOO from Output + if pce: + Scores_all, varExpDesignY = [], [] + for out_name in output_name: + y = self.MetaModel.ExpDesign.Y[out_name] + Scores_all.append(list( + self.MetaModel.score_dict['b_1'][out_name].values())) + if self.MetaModel.dim_red_method.lower() == 'pca': + pca = self.MetaModel.pca['b_1'][out_name] + components = pca.transform(y) + varExpDesignY.append(np.var(components, + axis=0)) + else: + varExpDesignY.append(np.var(y, axis=0)) + Scores = [item for sublist in Scores_all for item + in sublist] + weights = [item for sublist in varExpDesignY for item + in sublist] + ModifiedLOO = [np.average( + [1-score for score in Scores], weights=weights)] + + print('\n') + print(f"Updated ModifiedLOO {util_f}:\n", ModifiedLOO) + print('\n') + + # Compute the validation error + if self.MetaModel.valid_model_runs: + rmse, validError = self.__validError(self.MetaModel) + ValidError = list(validError.values()) + else: + rmse = None + + # Store updated ModifiedLOO + if pce: + SeqModifiedLOO = np.vstack( + (SeqModifiedLOO, ModifiedLOO)) + if len(self.MetaModel.valid_model_runs) != 0: + SeqValidError = np.vstack( + (SeqValidError, ValidError)) + m_4 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + # -------- Caclulation of BME as accuracy metric ------- + # Check if data is provided + if len(obs_data) != 0: + # Calculate the initial BME + out = self.__BME_Calculator(self.MetaModel, obs_data, + TotalSigma2, rmse) + BME, KLD, Posterior, likes, DistHellinger = out + print('\n') + print(f"Updated BME: {BME:.2f}") + print(f"Updated KLD: {KLD:.2f}") + print('\n') + + # Plot some snapshots of the posterior + step_snapshot = self.MetaModel.ExpDesign.step_snapshot + if post_snapshot and postcnt % step_snapshot == 0: + parNames = self.MetaModel.ExpDesign.par_names + print('Posterior snapshot is being plotted...') + self.__posteriorPlot(Posterior, parNames, + f'SeqPosterior_{postcnt}') + postcnt += 1 + m_5 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + + # Check the convergence of the Mean&Std + if mc_ref and pce: + print('\n') + RMSE_Mean, RMSE_std = self.__error_Mean_Std() + print(f"Updated Mean and Std error: {RMSE_Mean:.2f}, " + f"{RMSE_std:.2f}") + print('\n') + + # Store the updated BME & KLD + # Check if data is provided + if len(obs_data) != 0: + SeqBME = np.vstack((SeqBME, BME)) + SeqKLD = np.vstack((SeqKLD, KLD)) + SeqDistHellinger = np.vstack((SeqDistHellinger, + DistHellinger)) + if mc_ref and pce: + seqRMSEMean = np.vstack((seqRMSEMean, RMSE_Mean)) + seqRMSEStd = np.vstack((seqRMSEStd, RMSE_std)) + + if pce and any(LOO < mod_LOO_threshold + for LOO in ModifiedLOO): + break + + print(f"Memory itr {itr_no}: I: {m_2-m_1:.2f} MB") + print(f"Memory itr {itr_no}: II: {m_3-m_2:.2f} MB") + print(f"Memory itr {itr_no}: III: {m_4-m_3:.2f} MB") + print(f"Memory itr {itr_no}: IV: {m_5-m_4:.2f} MB") + m_6 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + print(f"Memory itr {itr_no}: total: {m_6:.2f} MB") + + # Clean up + if len(obs_data) != 0: + del out + gc.collect() + print() + print('-'*50) + print() + + # Store updated ModifiedLOO and BME in dictonary + strKey = f'{util_f}_rep_{repIdx+1}' + if pce: + self.MetaModel.SeqModifiedLOO[strKey] = SeqModifiedLOO + if len(self.MetaModel.valid_model_runs) != 0: + self.MetaModel.seqValidError[strKey] = SeqValidError + + # Check if data is provided + if len(obs_data) != 0: + self.MetaModel.SeqBME[strKey] = SeqBME + self.MetaModel.SeqKLD[strKey] = SeqKLD + if hasattr(self.MetaModel, 'valid_likelihoods') and \ + self.MetaModel.valid_likelihoods: + self.MetaModel.SeqDistHellinger[strKey] = SeqDistHellinger + if mc_ref and pce: + self.MetaModel.seqRMSEMean[strKey] = seqRMSEMean + self.MetaModel.seqRMSEStd[strKey] = seqRMSEStd + + # return self.MetaModel + + # ------------------------------------------------------------------------- + def util_VarBasedDesign(self, X_can, index, util_func='Entropy'): + """ + Computes the exploitation scores based on: + active learning MacKay(ALM) and active learning Cohn (ALC) + Paper: Sequential Design with Mutual Information for Computer + Experiments (MICE): Emulation of a Tsunami Model by Beck and Guillas + (2016) + + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + index : int + Model output index. + UtilMethod : string, optional + Exploitation utility function. The default is 'Entropy'. + + Returns + ------- + float + Score. + + """ + MetaModel = self.MetaModel + ED_X = MetaModel.ExpDesign.X + out_dict_y = MetaModel.ExpDesign.Y + out_names = MetaModel.ModelObj.Output.names + + # Run the Metamodel for the candidate + X_can = X_can.reshape(1, -1) + Y_PC_can, std_PC_can = MetaModel.eval_metamodel(samples=X_can) + + if util_func.lower() == 'alm': + # ----- Entropy/MMSE/active learning MacKay(ALM) ----- + # Compute perdiction variance of the old model + canPredVar = {key: std_PC_can[key]**2 for key in out_names} + + varPCE = np.zeros((len(out_names), X_can.shape[0])) + for KeyIdx, key in enumerate(out_names): + varPCE[KeyIdx] = np.max(canPredVar[key], axis=1) + score = np.max(varPCE, axis=0) + + elif util_func.lower() == 'eigf': + # ----- Expected Improvement for Global fit ----- + # Find closest EDX to the candidate + distances = distance.cdist(ED_X, X_can, 'euclidean') + index = np.argmin(distances) + + # Compute perdiction error and variance of the old model + predError = {key: Y_PC_can[key] for key in out_names} + canPredVar = {key: std_PC_can[key]**2 for key in out_names} + + # Compute perdiction error and variance of the old model + # Eq (5) from Liu et al.(2018) + EIGF_PCE = np.zeros((len(out_names), X_can.shape[0])) + for KeyIdx, key in enumerate(out_names): + residual = predError[key] - out_dict_y[key][int(index)] + var = canPredVar[key] + EIGF_PCE[KeyIdx] = np.max(residual**2 + var, axis=1) + score = np.max(EIGF_PCE, axis=0) + + return -1 * score # -1 is for minimization instead of maximization + + # ------------------------------------------------------------------------- + def util_BayesianActiveDesign(self, X_can, sigma2Dict, var='DKL'): + """ + Computes scores based on Bayesian active design criterion (var). + + It is based on the following paper: + Oladyshkin, Sergey, Farid Mohammadi, Ilja Kroeker, and Wolfgang Nowak. + "Bayesian3 active learning for the gaussian process emulator using + information theory." Entropy 22, no. 8 (2020): 890. + + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + var : string, optional + BAL design criterion. The default is 'DKL'. + + Returns + ------- + float + Score. + + """ + + # Evaluate the PCE metamodels at that location ??? + Y_mean_can, Y_std_can = self.MetaModel.eval_metamodel( + samples=np.array([X_can]) + ) + + # Get the data + obs_data = self.observations + n_obs = self.Model.n_obs + # TODO: Analytical DKL + # Sample a distribution for a normal dist + # with Y_mean_can as the mean and Y_std_can as std. + + # priorMean, priorSigma2, Obs = np.empty((0)),np.empty((0)),np.empty((0)) + + # for key in list(Y_mean_can): + # # concatenate the measurement error + # Obs = np.hstack((Obs,ObservationData[key])) + + # # concatenate the mean and variance of prior predictive + # means, stds = Y_mean_can[key][0], Y_std_can[key][0] + # priorMean = np.hstack((priorSigma2,means)) + # priorSigma2 = np.hstack((priorSigma2,stds**2)) + + # # Covariance Matrix of prior + # covPrior = np.zeros((priorSigma2.shape[0], priorSigma2.shape[0]), float) + # np.fill_diagonal(covPrior, priorSigma2) + + # # Covariance Matrix of Likelihood + # covLikelihood = np.zeros((sigma2Dict.shape[0], sigma2Dict.shape[0]), float) + # np.fill_diagonal(covLikelihood, sigma2Dict) + + # # Calculate moments of the posterior (Analytical derivation) + # n = priorSigma2.shape[0] + # covPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))),covLikelihood/n) + + # meanPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))) , Obs) + \ + # np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))), + # priorMean/n) + # # Compute DKL from prior to posterior + # term1 = np.trace(np.dot(np.linalg.inv(covPrior),covPost)) + # deltaMean = priorMean-meanPost + # term2 = np.dot(np.dot(deltaMean,np.linalg.inv(covPrior)),deltaMean[:,None]) + # term3 = np.log(np.linalg.det(covPrior)/np.linalg.det(covPost)) + # DKL = 0.5 * (term1 + term2 - n + term3)[0] + + # ---------- Inner MC simulation for computing Utility Value ---------- + # Estimation of the integral via Monte Varlo integration + MCsize = 20000 + ESS = 0 + + while ((ESS > MCsize) or (ESS < 1)): + + # Sample a distribution for a normal dist + # with Y_mean_can as the mean and Y_std_can as std. + Y_MC, std_MC = {}, {} + logPriorLikelihoods = np.zeros((MCsize)) + for key in list(Y_mean_can): + means, stds = Y_mean_can[key][0], Y_std_can[key][0] + # cov = np.zeros((means.shape[0], means.shape[0]), float) + # np.fill_diagonal(cov, stds**2) + + Y_MC[key] = np.zeros((MCsize, n_obs)) + logsamples = np.zeros((MCsize, n_obs)) + for i in range(n_obs): + NormalDensity = stats.norm(means[i], stds[i]) + Y_MC[key][:, i] = NormalDensity.rvs(MCsize) + logsamples[:, i] = NormalDensity.logpdf(Y_MC[key][:, i]) + + logPriorLikelihoods = np.sum(logsamples, axis=1) + std_MC[key] = np.zeros((MCsize, means.shape[0])) + + # Likelihood computation (Comparison of data and simulation + # results via PCE with candidate design) + likelihoods = self.__normpdf(Y_MC, std_MC, obs_data, sigma2Dict) + + # Check the Effective Sample Size (1<ESS<MCsize) + ESS = 1 / np.sum(np.square(likelihoods/np.nansum(likelihoods))) + + # Enlarge sample size if it doesn't fulfill the criteria + if ((ESS > MCsize) or (ESS < 1)): + MCsize *= 10 + ESS = 0 + + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] + + # Reject the poorly performed prior + accepted = (likelihoods/np.max(likelihoods)) >= unif + + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) + + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] + postExpLikelihoods = np.mean(np.log(postLikelihoods)) + + # Posterior-based expectation of prior densities + postExpPrior = np.mean(logPriorLikelihoods[accepted]) + + # Utility function Eq.2 in Ref. (2) + # Posterior covariance matrix after observing data y + # Kullback-Leibler Divergence (Sergey's paper) + if var == 'DKL': + + # TODO: Calculate the correction factor for BME + # BMECorrFactor = self.BME_Corr_Weight(PCE_SparseBayes_can, + # ObservationData, sigma2Dict) + # BME += BMECorrFactor + # Haun et al implementation + # U_J_d = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) + U_J_d = postExpLikelihoods - logBME + + # Marginal log likelihood + elif var == 'BME': + U_J_d = logBME + + # Entropy-based information gain + elif var == 'infEntropy': + logBME = np.log(np.nanmean(likelihoods)) + infEntropy = logBME - postExpPrior - postExpLikelihoods + U_J_d = infEntropy * -1 # -1 for minimization + + # Bayesian information criterion + elif var == 'BIC': + coeffs = self.MetaModel.coeffs_dict.values() + nModelParams = max(len(v) for val in coeffs for v in val.values()) + maxL = np.nanmax(likelihoods) + U_J_d = -2 * np.log(maxL) + np.log(n_obs) * nModelParams + + # Akaike information criterion + elif var == 'AIC': + coeffs = self.MetaModel.coeffs_dict.values() + nModelParams = max(len(v) for val in coeffs for v in val.values()) + maxlogL = np.log(np.nanmax(likelihoods)) + AIC = -2 * maxlogL + 2 * nModelParams + # 2 * nModelParams * (nModelParams+1) / (n_obs-nModelParams-1) + penTerm = 0 + U_J_d = 1*(AIC + penTerm) + + # Deviance information criterion + elif var == 'DIC': + # D_theta_bar = np.mean(-2 * Likelihoods) + N_star_p = 0.5 * np.var(np.log(likelihoods[likelihoods != 0])) + Likelihoods_theta_mean = self.__normpdf( + Y_mean_can, Y_std_can, obs_data, sigma2Dict + ) + DIC = -2 * np.log(Likelihoods_theta_mean) + 2 * N_star_p + + U_J_d = DIC + + else: + print('The algorithm you requested has not been implemented yet!') + + # Handle inf and NaN (replace by zero) + if np.isnan(U_J_d) or U_J_d == -np.inf or U_J_d == np.inf: + U_J_d = 0.0 + + # Clear memory + del likelihoods + del Y_MC + del std_MC + gc.collect(generation=2) + + return -1 * U_J_d # -1 is for minimization instead of maximization + + # ------------------------------------------------------------------------- + def update_metamodel(self, MetaModel, output, y_hat_can, univ_p_val, index, + new_pca=False): + BasisIndices = MetaModel.basis_dict[output]["y_"+str(index+1)] + clf_poly = MetaModel.clf_poly[output]["y_"+str(index+1)] + Mn = clf_poly.coef_ + Sn = clf_poly.sigma_ + beta = clf_poly.alpha_ + active = clf_poly.active_ + Psi = self.MetaModel.create_psi(BasisIndices, univ_p_val) + + Sn_new_inv = np.linalg.inv(Sn) + Sn_new_inv += beta * np.dot(Psi[:, active].T, Psi[:, active]) + Sn_new = np.linalg.inv(Sn_new_inv) + + Mn_new = np.dot(Sn_new_inv, Mn[active]).reshape(-1, 1) + Mn_new += beta * np.dot(Psi[:, active].T, y_hat_can) + Mn_new = np.dot(Sn_new, Mn_new).flatten() + + # Compute the old and new moments of PCEs + mean_old = Mn[0] + mean_new = Mn_new[0] + std_old = np.sqrt(np.sum(np.square(Mn[1:]))) + std_new = np.sqrt(np.sum(np.square(Mn_new[1:]))) + + # Back transformation if PCA is selected. + if MetaModel.dim_red_method.lower() == 'pca': + old_pca = MetaModel.pca[output] + mean_old = old_pca.mean_[index] + mean_old += np.sum(mean_old * old_pca.components_[:, index]) + std_old = np.sqrt(np.sum(std_old**2 * + old_pca.components_[:, index]**2)) + mean_new = new_pca.mean_[index] + mean_new += np.sum(mean_new * new_pca.components_[:, index]) + std_new = np.sqrt(np.sum(std_new**2 * + new_pca.components_[:, index]**2)) + # print(f"mean_old: {mean_old:.2f} mean_new: {mean_new:.2f}") + # print(f"std_old: {std_old:.2f} std_new: {std_new:.2f}") + # Store the old and new moments of PCEs + results = { + 'mean_old': mean_old, + 'mean_new': mean_new, + 'std_old': std_old, + 'std_new': std_new + } + return results + + # ------------------------------------------------------------------------- + def util_BayesianDesign_old(self, X_can, X_MC, sigma2Dict, var='DKL'): + """ + Computes scores based on Bayesian sequential design criterion (var). + + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + var : string, optional + Bayesian design criterion. The default is 'DKL'. + + Returns + ------- + float + Score. + + """ + + # To avoid changes ub original aPCE object + Model = self.Model + MetaModel = deepcopy(self.MetaModel) + old_EDY = MetaModel.ExpDesign.Y + + # Evaluate the PCE metamodels using the candidate design + Y_PC_can, Y_std_can = self.MetaModel.eval_metamodel( + samples=np.array([X_can]) + ) + + # Generate y from posterior predictive + m_size = 100 + y_hat_samples = {} + for idx, key in enumerate(Model.Output.names): + means, stds = Y_PC_can[key][0], Y_std_can[key][0] + y_hat_samples[key] = np.random.multivariate_normal( + means, np.diag(stds), m_size) + + # Create the SparseBayes-based PCE metamodel: + MetaModel.input_obj.poly_coeffs_flag = False + univ_p_val = self.MetaModel.univ_basis_vals(X_can) + G_n_m_all = np.zeros((m_size, len(Model.Output.names), Model.n_obs)) + + for i in range(m_size): + for idx, key in enumerate(Model.Output.names): + if MetaModel.dim_red_method.lower() == 'pca': + # Equal number of components + new_outputs = np.vstack( + (old_EDY[key], y_hat_samples[key][i]) + ) + new_pca, _ = MetaModel.pca_transformation(new_outputs) + target = new_pca.transform( + y_hat_samples[key][i].reshape(1, -1) + )[0] + else: + new_pca, target = False, y_hat_samples[key][i] + + for j in range(len(target)): + + # Update surrogate + result = self.update_metamodel( + MetaModel, key, target[j], univ_p_val, j, new_pca) + + # Compute Expected Information Gain (Eq. 39) + G_n_m = np.log(result['std_old']/result['std_new']) - 1./2 + G_n_m += result['std_new']**2 / (2*result['std_old']**2) + G_n_m += (result['mean_new'] - result['mean_old'])**2 /\ + (2*result['std_old']**2) + + G_n_m_all[i, idx, j] = G_n_m + + U_J_d = G_n_m_all.mean(axis=(1, 2)).mean() + return -1 * U_J_d + + # ------------------------------------------------------------------------- + def util_BayesianDesign(self, X_can, X_MC, sigma2Dict, var='DKL'): + """ + Computes scores based on Bayesian sequential design criterion (var). + + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + var : string, optional + Bayesian design criterion. The default is 'DKL'. + + Returns + ------- + float + Score. + + """ + + # To avoid changes ub original aPCE object + Model = self.Model + MetaModel = deepcopy(self.MetaModel) + out_names = MetaModel.ModelObj.Output.names + if X_can.ndim == 1: + X_can = X_can.reshape(1, -1) + + # Compute the mean and std based on the MetaModel + # pce_means, pce_stds = self._compute_pce_moments(MetaModel) + if var == 'ALC': + Y_MC, Y_MC_std = MetaModel.eval_metamodel(samples=X_MC) + + # Old Experimental design + oldExpDesignX = MetaModel.ExpDesign.X + oldExpDesignY = MetaModel.ExpDesign.Y + + # Evaluate the PCE metamodels at that location ??? + Y_PC_can, Y_std_can = MetaModel.eval_metamodel(samples=X_can) + + # Add all suggestion as new ExpDesign + NewExpDesignX = np.vstack((oldExpDesignX, X_can)) + + NewExpDesignY = {} + for key in oldExpDesignY.keys(): + try: + NewExpDesignY[key] = np.vstack((oldExpDesignY[key], + Y_PC_can[key])) + except: + NewExpDesignY[key] = oldExpDesignY[key] + + MetaModel.ExpDesign.sampling_method = 'user' + MetaModel.ExpDesign.X = NewExpDesignX + MetaModel.ExpDesign.Y = NewExpDesignY + + # Train the model for the observed data using x_can + MetaModel.input_obj.poly_coeffs_flag = False + PCE_Model_can = MetaModel.train_norm_design(Model, parallel=False) + + if var.lower() == 'mi': + # Mutual information based on Krause et al + # Adapted from Beck & Guillas (MICE) paper + _, std_PC_can = PCE_Model_can.eval_metamodel(samples=X_can) + std_can = {key: std_PC_can[key] for key in out_names} + + std_old = {key: Y_std_can[key] for key in out_names} + + varPCE = np.zeros((len(out_names))) + for i, key in enumerate(out_names): + varPCE[i] = np.mean(std_old[key]**2/std_can[key]**2) + score = np.mean(varPCE) + + return -1 * score + + elif var.lower() == 'alc': + # Active learning based on Gramyc and Lee + # Adaptive design and analysis of supercomputer experiments Techno- + # metrics, 51 (2009), pp. 130–145. + + # Evaluate the MetaModel at the given samples + Y_MC_can, Y_MC_std_can = PCE_Model_can.eval_metamodel(samples=X_MC) + + # Compute the score + score = [] + for i, key in enumerate(out_names): + pce_var = Y_MC_std_can[key]**2 + pce_var_can = Y_MC_std[key]**2 + score.append(np.mean(pce_var-pce_var_can, axis=0)) + score = np.mean(score) + + return -1 * score + + # ---------- Inner MC simulation for computing Utility Value ---------- + # Estimation of the integral via Monte Varlo integration + MCsize = X_MC.shape[0] + ESS = 0 + + while ((ESS > MCsize) or (ESS < 1)): + + # Enriching Monte Carlo samples if need be + if ESS != 0: + X_MC = self.MetaModel.ExpDesign.generate_samples( + MCsize, 'random' + ) + + # Evaluate the MetaModel at the given samples + Y_MC, std_MC = PCE_Model_can.eval_metamodel(samples=X_MC) + + # Likelihood computation (Comparison of data and simulation + # results via PCE with candidate design) + likelihoods = self.__normpdf( + Y_MC, std_MC, self.observations, sigma2Dict + ) + + # Check the Effective Sample Size (1<ESS<MCsize) + ESS = 1 / np.sum(np.square(likelihoods/np.sum(likelihoods))) + + # Enlarge sample size if it doesn't fulfill the criteria + if ((ESS > MCsize) or (ESS < 1)): + print("--- increasing MC size---") + MCsize *= 10 + ESS = 0 + + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] + + # Reject the poorly performed prior + accepted = (likelihoods/np.max(likelihoods)) >= unif + + # -------------------- Utility functions -------------------- + # Utility function Eq.2 in Ref. (2) + # Kullback-Leibler Divergence (Sergey's paper) + if var == 'DKL': + + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods, dtype=np.float128)) + + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] + postExpLikelihoods = np.mean(np.log(postLikelihoods)) + + # Haun et al implementation + U_J_d = np.mean(np.log(likelihoods[likelihoods != 0]) - logBME) + + # U_J_d = np.sum(G_n_m_all) + # Ryan et al (2014) implementation + # importanceWeights = Likelihoods[Likelihoods!=0]/np.sum(Likelihoods[Likelihoods!=0]) + # U_J_d = np.mean(importanceWeights*np.log(Likelihoods[Likelihoods!=0])) - logBME + + # U_J_d = postExpLikelihoods - logBME + + # Marginal likelihood + elif var == 'BME': + + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) + U_J_d = logBME + + # Bayes risk likelihood + elif var == 'BayesRisk': + + U_J_d = -1 * np.var(likelihoods) + + # Entropy-based information gain + elif var == 'infEntropy': + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) + + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] / np.nansum(likelihoods[accepted]) + postExpLikelihoods = np.mean(np.log(postLikelihoods)) + + # Posterior-based expectation of prior densities + postExpPrior = np.mean(logPriorLikelihoods[accepted]) + + infEntropy = logBME - postExpPrior - postExpLikelihoods + + U_J_d = infEntropy * -1 # -1 for minimization + + # D-Posterior-precision + elif var == 'DPP': + X_Posterior = X_MC[accepted] + # covariance of the posterior parameters + U_J_d = -np.log(np.linalg.det(np.cov(X_Posterior))) + + # A-Posterior-precision + elif var == 'APP': + X_Posterior = X_MC[accepted] + # trace of the posterior parameters + U_J_d = -np.log(np.trace(np.cov(X_Posterior))) + + else: + print('The algorithm you requested has not been implemented yet!') + + # Clear memory + del likelihoods + del Y_MC + del std_MC + gc.collect(generation=2) + + return -1 * U_J_d # -1 is for minimization instead of maximization + + # ------------------------------------------------------------------------- + def subdomain(self, Bounds, n_new_samples): + """ + Divides a domain defined by Bounds into sub domains. + + Parameters + ---------- + Bounds : list of tuples + List of lower and upper bounds. + n_new_samples : TYPE + DESCRIPTION. + + Returns + ------- + Subdomains : TYPE + DESCRIPTION. + + """ + n_params = self.MetaModel.n_params + n_subdomains = n_new_samples + 1 + LinSpace = np.zeros((n_params, n_subdomains)) + + for i in range(n_params): + LinSpace[i] = np.linspace(start=Bounds[i][0], stop=Bounds[i][1], + num=n_subdomains) + Subdomains = [] + for k in range(n_subdomains-1): + mylist = [] + for i in range(n_params): + mylist.append((LinSpace[i, k+0], LinSpace[i, k+1])) + Subdomains.append(tuple(mylist)) + + return Subdomains + + # ------------------------------------------------------------------------- + def run_util_func(self, method, candidates, index, sigma2Dict=None, + var=None, X_MC=None): + """ + Runs the utility function based on the given method. + + Parameters + ---------- + method : string + Exploitation method: `VarOptDesign`, `BayesActDesign` and + `BayesOptDesign`. + candidates : array of shape (n_samples, n_params) + All candidate parameter sets. + index : int + ExpDesign index. + sigma2Dict : dict, optional + A dictionary containing the measurement errors (sigma^2). The + default is None. + var : string, optional + Utility function. The default is None. + X_MC : TYPE, optional + DESCRIPTION. The default is None. + + Returns + ------- + index : TYPE + DESCRIPTION. + List + Scores. + + """ + + if method.lower() == 'varoptdesign': + # U_J_d = self.util_VarBasedDesign(candidates, index, var) + U_J_d = np.zeros((candidates.shape[0])) + for idx, X_can in tqdm(enumerate(candidates), ascii=True, + desc="varoptdesign"): + U_J_d[idx] = self.util_VarBasedDesign(X_can, index, var) + + elif method.lower() == 'bayesactdesign': + NCandidate = candidates.shape[0] + U_J_d = np.zeros((NCandidate)) + for idx, X_can in tqdm(enumerate(candidates), ascii=True, + desc="OptBayesianDesign"): + U_J_d[idx] = self.util_BayesianActiveDesign(X_can, sigma2Dict, + var) + elif method.lower() == 'bayesoptdesign': + NCandidate = candidates.shape[0] + U_J_d = np.zeros((NCandidate)) + for idx, X_can in tqdm(enumerate(candidates), ascii=True, + desc="OptBayesianDesign"): + U_J_d[idx] = self.util_BayesianDesign(X_can, X_MC, sigma2Dict, + var) + return (index, -1 * U_J_d) + + # ------------------------------------------------------------------------- + def dual_annealing(self, method, Bounds, sigma2Dict, var, Run_No, + verbose=False): + """ + Exploration algorithim to find the optimum parameter space. + + Parameters + ---------- + method : string + Exploitation method: `VarOptDesign`, `BayesActDesign` and + `BayesOptDesign`. + Bounds : list of tuples + List of lower and upper boundaries of parameters. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + Run_No : int + Run number. + verbose : bool, optional + Print out a summary. The default is False. + + Returns + ------- + Run_No : int + Run number. + array + Optimial candidate. + + """ + + Model = self.Model + max_func_itr = self.MetaModel.ExpDesign.max_func_itr + + if method == 'VarOptDesign': + Res_Global = opt.dual_annealing(self.util_VarBasedDesign, + bounds=Bounds, + args=(Model, var), + maxfun=max_func_itr) + + elif method == 'BayesOptDesign': + Res_Global = opt.dual_annealing(self.util_BayesianDesign, + bounds=Bounds, + args=(Model, sigma2Dict, var), + maxfun=max_func_itr) + + if verbose: + print(f"global minimum: xmin = {Res_Global.x}, " + f"f(xmin) = {Res_Global.fun:.6f}, nfev = {Res_Global.nfev}") + + return (Run_No, Res_Global.x) + + # ------------------------------------------------------------------------- + def tradoff_weights(self, tradeoff_scheme, old_EDX, old_EDY): + """ + Calculates weights for exploration scores based on the requested + scheme: `None`, `equal`, `epsilon-decreasing` and `adaptive`. + + `None`: No exploration. + `equal`: Same weights for exploration and exploitation scores. + `epsilon-decreasing`: Start with more exploration and increase the + influence of exploitation along the way with a exponential decay + function + `adaptive`: An adaptive method based on: + Liu, Haitao, Jianfei Cai, and Yew-Soon Ong. "An adaptive sampling + approach for Kriging metamodeling by maximizing expected prediction + error." Computers & Chemical Engineering 106 (2017): 171-182. + + Parameters + ---------- + tradeoff_scheme : string + Trade-off scheme for exloration and exploitation scores. + old_EDX : array (n_samples, n_params) + Old experimental design (training points). + old_EDY : dict + Old model responses (targets). + + Returns + ------- + exploration_weight : float + Exploration weight. + exploitation_weight: float + Exploitation weight. + + """ + if tradeoff_scheme is None: + exploration_weight = 0 + + elif tradeoff_scheme == 'equal': + exploration_weight = 0.5 + + elif tradeoff_scheme == 'epsilon-decreasing': + # epsilon-decreasing scheme + # Start with more exploration and increase the influence of + # exploitation along the way with a exponential decay function + initNSamples = self.MetaModel.ExpDesign.n_init_samples + n_max_samples = self.MetaModel.ExpDesign.n_max_samples + + itrNumber = (self.MetaModel.ExpDesign.X.shape[0] - initNSamples) + itrNumber //= self.MetaModel.ExpDesign.n_new_samples + + tau2 = -(n_max_samples-initNSamples-1) / np.log(1e-8) + exploration_weight = signal.exponential(n_max_samples-initNSamples, + 0, tau2, False)[itrNumber] + + elif tradeoff_scheme == 'adaptive': + + # Extract itrNumber + initNSamples = self.MetaModel.ExpDesign.n_init_samples + n_max_samples = self.MetaModel.ExpDesign.n_max_samples + itrNumber = (self.MetaModel.ExpDesign.X.shape[0] - initNSamples) + itrNumber //= self.MetaModel.ExpDesign.n_new_samples + + if itrNumber == 0: + exploration_weight = 0.5 + else: + # New adaptive trade-off according to Liu et al. (2017) + # Mean squared error for last design point + last_EDX = old_EDX[-1].reshape(1, -1) + lastPCEY, _ = self.MetaModel.eval_metamodel(samples=last_EDX) + pce_y = np.array(list(lastPCEY.values()))[:, 0] + y = np.array(list(old_EDY.values()))[:, -1, :] + mseError = mean_squared_error(pce_y, y) + + # Mean squared CV - error for last design point + pce_y_prev = np.array(list(self._y_hat_prev.values()))[:, 0] + mseCVError = mean_squared_error(pce_y_prev, y) + + exploration_weight = min([0.5*mseError/mseCVError, 1]) + + # Exploitation weight + exploitation_weight = 1 - exploration_weight + + return exploration_weight, exploitation_weight + + # ------------------------------------------------------------------------- + def opt_SeqDesign(self, sigma2, n_candidates=5, var='DKL'): + """ + Runs optimal sequential design. + + Parameters + ---------- + sigma2 : dict, optional + A dictionary containing the measurement errors (sigma^2). The + default is None. + n_candidates : int, optional + Number of candidate samples. The default is 5. + var : string, optional + Utility function. The default is None. + + Raises + ------ + NameError + Wrong utility function. + + Returns + ------- + Xnew : array (n_samples, n_params) + Selected new training point(s). + """ + + # Initialization + MetaModel = self.MetaModel + Bounds = MetaModel.bound_tuples + n_new_samples = MetaModel.ExpDesign.n_new_samples + explore_method = MetaModel.ExpDesign.explore_method + exploit_method = MetaModel.ExpDesign.exploit_method + n_cand_groups = MetaModel.ExpDesign.n_cand_groups + tradeoff_scheme = MetaModel.ExpDesign.tradeoff_scheme + + old_EDX = MetaModel.ExpDesign.X + old_EDY = MetaModel.ExpDesign.Y.copy() + ndim = MetaModel.ExpDesign.X.shape[1] + OutputNames = MetaModel.ModelObj.Output.names + + # ----------------------------------------- + # ----------- CUSTOMIZED METHODS ---------- + # ----------------------------------------- + # Utility function exploit_method provided by user + if exploit_method.lower() == 'user': + + Xnew, filteredSamples = MetaModel.ExpDesign.ExploitFunction(self) + + print("\n") + print("\nXnew:\n", Xnew) + + return Xnew, filteredSamples + + # ----------------------------------------- + # ---------- EXPLORATION METHODS ---------- + # ----------------------------------------- + if explore_method == 'dual annealing': + # ------- EXPLORATION: OPTIMIZATION ------- + import time + start_time = time.time() + + # Divide the domain to subdomains + args = [] + subdomains = self.subdomain(Bounds, n_new_samples) + for i in range(n_new_samples): + args.append((exploit_method, subdomains[i], sigma2, var, i)) + + # Multiprocessing + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + + # With Pool.starmap_async() + results = pool.starmap_async(self.dual_annealing, args).get() + + # Close the pool + pool.close() + + Xnew = np.array([results[i][1] for i in range(n_new_samples)]) + + print("\nXnew:\n", Xnew) + + elapsed_time = time.time() - start_time + print("\n") + print(f"elapsed_time: {round(elapsed_time,2)} sec.") + print('-'*20) + + elif explore_method == 'LOOCV': + # ----------------------------------------------------------------- + # TODO: LOOCV model construnction based on Feng et al. (2020) + # 'LOOCV': + # Initilize the ExploitScore array + + # Generate random samples + allCandidates = MetaModel.ExpDesign.generate_samples(n_candidates, + 'random') + + # Construct error model based on LCerror + errorModel = MetaModel.create_ModelError(old_EDX, self.LCerror) + self.errorModel.append(copy(errorModel)) + + # Evaluate the error models for allCandidates + eLCAllCands, _ = errorModel.eval_errormodel(allCandidates) + # Select the maximum as the representative error + eLCAllCands = np.dstack(eLCAllCands.values()) + eLCAllCandidates = np.max(eLCAllCands, axis=1)[:, 0] + + # Normalize the error w.r.t the maximum error + scoreExploration = eLCAllCandidates / np.sum(eLCAllCandidates) + + else: + # ------- EXPLORATION: SPACE-FILLING DESIGN ------- + # Generate candidate samples from Exploration class + explore = Exploration(MetaModel, n_candidates) + explore.w = 100 # * ndim #500 + # Select criterion (mc-intersite-proj-th, mc-intersite-proj) + explore.mc_criterion = 'mc-intersite-proj' + allCandidates, scoreExploration = explore.get_exploration_samples() + + # Temp: ---- Plot all candidates ----- + if ndim == 2: + def plotter(points, allCandidates, Method, + scoreExploration=None): + if Method == 'Voronoi': + from scipy.spatial import Voronoi, voronoi_plot_2d + vor = Voronoi(points) + fig = voronoi_plot_2d(vor) + ax1 = fig.axes[0] + else: + fig = plt.figure() + ax1 = fig.add_subplot(111) + ax1.scatter(points[:, 0], points[:, 1], s=10, c='r', + marker="s", label='Old Design Points') + ax1.scatter(allCandidates[:, 0], allCandidates[:, 1], s=10, + c='b', marker="o", label='Design candidates') + for i in range(points.shape[0]): + txt = 'p'+str(i+1) + ax1.annotate(txt, (points[i, 0], points[i, 1])) + if scoreExploration is not None: + for i in range(allCandidates.shape[0]): + txt = str(round(scoreExploration[i], 5)) + ax1.annotate(txt, (allCandidates[i, 0], + allCandidates[i, 1])) + + plt.xlim(self.bound_tuples[0]) + plt.ylim(self.bound_tuples[1]) + # plt.show() + plt.legend(loc='upper left') + + # ----------------------------------------- + # --------- EXPLOITATION METHODS ---------- + # ----------------------------------------- + if exploit_method == 'BayesOptDesign' or\ + exploit_method == 'BayesActDesign': + + # ------- Calculate Exoploration weight ------- + # Compute exploration weight based on trade off scheme + explore_w, exploit_w = self.tradoff_weights(tradeoff_scheme, + old_EDX, + old_EDY) + print(f"\n Exploration weight={explore_w:0.3f} " + f"Exploitation weight={exploit_w:0.3f}\n") + + # ------- EXPLOITATION: BayesOptDesign & ActiveLearning ------- + if explore_w != 1.0: + + # Create a sample pool for rejection sampling + MCsize = 15000 + X_MC = MetaModel.ExpDesign.generate_samples(MCsize, 'random') + candidates = MetaModel.ExpDesign.generate_samples( + MetaModel.ExpDesign.max_func_itr, 'latin_hypercube') + + # Split the candidates in groups for multiprocessing + split_cand = np.array_split( + candidates, n_cand_groups, axis=0 + ) + + results = Parallel(n_jobs=-1, backend='threading')( + delayed(self.run_util_func)( + exploit_method, split_cand[i], i, sigma2, var, X_MC) + for i in range(n_cand_groups)) + # out = map(self.run_util_func, + # [exploit_method]*n_cand_groups, + # split_cand, + # range(n_cand_groups), + # [sigma2] * n_cand_groups, + # [var] * n_cand_groups, + # [X_MC] * n_cand_groups + # ) + # results = list(out) + + # Retrieve the results and append them + U_J_d = np.concatenate([results[NofE][1] for NofE in + range(n_cand_groups)]) + + # Check if all scores are inf + if np.isinf(U_J_d).all() or np.isnan(U_J_d).all(): + U_J_d = np.ones(len(U_J_d)) + + # Get the expected value (mean) of the Utility score + # for each cell + if explore_method == 'Voronoi': + U_J_d = np.mean(U_J_d.reshape(-1, n_candidates), axis=1) + + # create surrogate model for U_J_d + from sklearn.preprocessing import MinMaxScaler + # Take care of inf entries + good_indices = [i for i, arr in enumerate(U_J_d) + if np.isfinite(arr).all()] + scaler = MinMaxScaler() + X_S = scaler.fit_transform(candidates[good_indices]) + gp = MetaModel.gaussian_process_emulator( + X_S, U_J_d[good_indices], autoSelect=True + ) + U_J_d = gp.predict(scaler.transform(allCandidates)) + + # Normalize U_J_d + norm_U_J_d = U_J_d / np.sum(U_J_d) + print("norm_U_J_d:\n", norm_U_J_d) + else: + norm_U_J_d = np.zeros((len(scoreExploration))) + + # ------- Calculate Total score ------- + # ------- Trade off between EXPLORATION & EXPLOITATION ------- + # Total score + totalScore = exploit_w * norm_U_J_d + totalScore += explore_w * scoreExploration + + # temp: Plot + # dim = self.ExpDesign.X.shape[1] + # if dim == 2: + # plotter(self.ExpDesign.X, allCandidates, explore_method) + + # ------- Select the best candidate ------- + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + temp = totalScore.copy() + temp[np.isnan(totalScore)] = -np.inf + sorted_idxtotalScore = np.argsort(temp)[::-1] + bestIdx = sorted_idxtotalScore[:n_new_samples] + + # select the requested number of samples + if explore_method == 'Voronoi': + Xnew = np.zeros((n_new_samples, ndim)) + for i, idx in enumerate(bestIdx): + X_can = explore.closestPoints[idx] + + # Calculate the maxmin score for the region of interest + newSamples, maxminScore = explore.get_mc_samples(X_can) + + # select the requested number of samples + Xnew[i] = newSamples[np.argmax(maxminScore)] + else: + Xnew = allCandidates[sorted_idxtotalScore[:n_new_samples]] + + elif exploit_method == 'VarOptDesign': + # ------- EXPLOITATION: VarOptDesign ------- + UtilMethod = var + + # ------- Calculate Exoploration weight ------- + # Compute exploration weight based on trade off scheme + explore_w, exploit_w = self.tradoff_weights(tradeoff_scheme, + old_EDX, + old_EDY) + print(f"\nweightExploration={explore_w:0.3f} " + f"weightExploitation={exploit_w:0.3f}") + + # Generate candidate samples from Exploration class + nMeasurement = old_EDY[OutputNames[0]].shape[1] + + # Find sensitive region + if UtilMethod == 'LOOCV': + LCerror = MetaModel.LCerror + allModifiedLOO = np.zeros((len(old_EDX), len(OutputNames), + nMeasurement)) + for y_idx, y_key in enumerate(OutputNames): + for idx, key in enumerate(LCerror[y_key].keys()): + allModifiedLOO[:, y_idx, idx] = abs( + LCerror[y_key][key]) + + ExploitScore = np.max(np.max(allModifiedLOO, axis=1), axis=1) + + elif UtilMethod in ['EIGF', 'ALM']: + # ----- All other in ['EIGF', 'ALM'] ----- + # Initilize the ExploitScore array + ExploitScore = np.zeros((len(old_EDX), len(OutputNames))) + + # Split the candidates in groups for multiprocessing + if explore_method != 'Voronoi': + split_cand = np.array_split(allCandidates, + n_cand_groups, + axis=0) + goodSampleIdx = range(n_cand_groups) + else: + # Find indices of the Vornoi cells with samples + goodSampleIdx = [] + for idx in range(len(explore.closest_points)): + if len(explore.closest_points[idx]) != 0: + goodSampleIdx.append(idx) + split_cand = explore.closest_points + + # Split the candidates in groups for multiprocessing + args = [] + for index in goodSampleIdx: + args.append((exploit_method, split_cand[index], index, + sigma2, var)) + + # Multiprocessing + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + # With Pool.starmap_async() + results = pool.starmap_async(self.run_util_func, args).get() + + # Close the pool + pool.close() + # out = map(self.run_util_func, + # [exploit_method]*len(goodSampleIdx), + # split_cand, + # range(len(goodSampleIdx)), + # [sigma2] * len(goodSampleIdx), + # [var] * len(goodSampleIdx) + # ) + # results = list(out) + + # Retrieve the results and append them + if explore_method == 'Voronoi': + ExploitScore = [np.mean(results[k][1]) for k in + range(len(goodSampleIdx))] + else: + ExploitScore = np.concatenate( + [results[k][1] for k in range(len(goodSampleIdx))]) + + else: + raise NameError('The requested utility function is not ' + 'available.') + + # print("ExploitScore:\n", ExploitScore) + + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + # Total score + # Normalize U_J_d + ExploitScore = ExploitScore / np.sum(ExploitScore) + totalScore = exploit_w * ExploitScore + totalScore += explore_w * scoreExploration + + temp = totalScore.copy() + sorted_idxtotalScore = np.argsort(temp, axis=0)[::-1] + bestIdx = sorted_idxtotalScore[:n_new_samples] + + Xnew = np.zeros((n_new_samples, ndim)) + if explore_method != 'Voronoi': + Xnew = allCandidates[bestIdx] + else: + for i, idx in enumerate(bestIdx.flatten()): + X_can = explore.closest_points[idx] + # plotter(self.ExpDesign.X, X_can, explore_method, + # scoreExploration=None) + + # Calculate the maxmin score for the region of interest + newSamples, maxminScore = explore.get_mc_samples(X_can) + + # select the requested number of samples + Xnew[i] = newSamples[np.argmax(maxminScore)] + + elif exploit_method == 'alphabetic': + # ------- EXPLOITATION: ALPHABETIC ------- + Xnew = self.util_AlphOptDesign(allCandidates, var) + + elif exploit_method == 'Space-filling': + # ------- EXPLOITATION: SPACE-FILLING ------- + totalScore = scoreExploration + + # ------- Select the best candidate ------- + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + temp = totalScore.copy() + temp[np.isnan(totalScore)] = -np.inf + sorted_idxtotalScore = np.argsort(temp)[::-1] + + # select the requested number of samples + Xnew = allCandidates[sorted_idxtotalScore[:n_new_samples]] + + else: + raise NameError('The requested design method is not available.') + + print("\n") + print("\nRun No. {}:".format(old_EDX.shape[0]+1)) + print("Xnew:\n", Xnew) + + return Xnew, None + + # ------------------------------------------------------------------------- + def util_AlphOptDesign(self, candidates, var='D-Opt'): + """ + Enriches the Experimental design with the requested alphabetic + criterion based on exploring the space with number of sampling points. + + Ref: Hadigol, M., & Doostan, A. (2018). Least squares polynomial chaos + expansion: A review of sampling strategies., Computer Methods in + Applied Mechanics and Engineering, 332, 382-407. + + Arguments + --------- + NCandidate : int + Number of candidate points to be searched + + var : string + Alphabetic optimality criterion + + Returns + ------- + X_new : array of shape (1, n_params) + The new sampling location in the input space. + """ + MetaModelOrig = self + Model = self.Model + n_new_samples = MetaModelOrig.ExpDesign.n_new_samples + NCandidate = candidates.shape[0] + + # TODO: Loop over outputs + OutputName = Model.Output.names[0] + + # To avoid changes ub original aPCE object + MetaModel = deepcopy(MetaModelOrig) + + # Old Experimental design + oldExpDesignX = MetaModel.ExpDesign.X + + # TODO: Only one psi can be selected. + # Suggestion: Go for the one with the highest LOO error + Scores = list(MetaModel.score_dict[OutputName].values()) + ModifiedLOO = [1-score for score in Scores] + outIdx = np.argmax(ModifiedLOO) + + # Initialize Phi to save the criterion's values + Phi = np.zeros((NCandidate)) + + BasisIndices = MetaModelOrig.basis_dict[OutputName]["y_"+str(outIdx+1)] + P = len(BasisIndices) + + # ------ Old Psi ------------ + univ_p_val = MetaModelOrig.univ_basis_vals(oldExpDesignX) + Psi = MetaModelOrig.create_psi(BasisIndices, univ_p_val) + + # ------ New candidates (Psi_c) ------------ + # Assemble Psi_c + univ_p_val_c = self.univ_basis_vals(candidates) + Psi_c = self.create_psi(BasisIndices, univ_p_val_c) + + for idx in range(NCandidate): + + # Include the new row to the original Psi + Psi_cand = np.vstack((Psi, Psi_c[idx])) + + # Information matrix + PsiTPsi = np.dot(Psi_cand.T, Psi_cand) + M = PsiTPsi / (len(oldExpDesignX)+1) + + if np.linalg.cond(PsiTPsi) > 1e-12 \ + and np.linalg.cond(PsiTPsi) < 1 / sys.float_info.epsilon: + # faster + invM = linalg.solve(M, sparse.eye(PsiTPsi.shape[0]).toarray()) + else: + # stabler + invM = np.linalg.pinv(M) + + # ---------- Calculate optimality criterion ---------- + # Optimality criteria according to Section 4.5.1 in Ref. + + # D-Opt + if var == 'D-Opt': + Phi[idx] = (np.linalg.det(invM)) ** (1/P) + + # A-Opt + elif var == 'A-Opt': + Phi[idx] = np.trace(invM) + + # K-Opt + elif var == 'K-Opt': + Phi[idx] = np.linalg.cond(M) + + else: + raise Exception('The optimality criterion you requested has ' + 'not been implemented yet!') + + # find an optimal point subset to add to the initial design + # by minimization of the Phi + sorted_idxtotalScore = np.argsort(Phi) + + # select the requested number of samples + Xnew = candidates[sorted_idxtotalScore[:n_new_samples]] + + return Xnew + + # ------------------------------------------------------------------------- + def __normpdf(self, y_hat_pce, std_pce, obs_data, total_sigma2s, + rmse=None): + + Model = self.Model + likelihoods = 1.0 + + # Loop over the outputs + for idx, out in enumerate(Model.Output.names): + + # (Meta)Model Output + nsamples, nout = y_hat_pce[out].shape + + # Prepare data and remove NaN + try: + data = obs_data[out].values[~np.isnan(obs_data[out])] + except AttributeError: + data = obs_data[out][~np.isnan(obs_data[out])] + + # Prepare sigma2s + non_nan_indices = ~np.isnan(total_sigma2s[out]) + tot_sigma2s = total_sigma2s[out][non_nan_indices][:nout].values + + # Surrogate error if valid dataset is given. + if rmse is not None: + tot_sigma2s += rmse[out]**2 + + likelihoods *= stats.multivariate_normal.pdf( + y_hat_pce[out], data, np.diag(tot_sigma2s), + allow_singular=True) + self.Likelihoods = likelihoods + + return likelihoods + + # ------------------------------------------------------------------------- + def __corr_factor_BME(self, obs_data, total_sigma2s, logBME): + """ + Calculates the correction factor for BMEs. + """ + MetaModel = self.MetaModel + samples = MetaModel.ExpDesign.X # valid_samples + model_outputs = MetaModel.ExpDesign.Y # valid_model_runs + Model = MetaModel.ModelObj + n_samples = samples.shape[0] + + # Extract the requested model outputs for likelihood calulation + output_names = Model.Output.names + + # TODO: Evaluate MetaModel on the experimental design and ValidSet + OutputRS, stdOutputRS = MetaModel.eval_metamodel(samples=samples) + + logLik_data = np.zeros((n_samples)) + logLik_model = np.zeros((n_samples)) + # Loop over the outputs + for idx, out in enumerate(output_names): + + # (Meta)Model Output + nsamples, nout = model_outputs[out].shape + + # Prepare data and remove NaN + try: + data = obs_data[out].values[~np.isnan(obs_data[out])] + except AttributeError: + data = obs_data[out][~np.isnan(obs_data[out])] + + # Prepare sigma2s + non_nan_indices = ~np.isnan(total_sigma2s[out]) + tot_sigma2s = total_sigma2s[out][non_nan_indices][:nout] + + # Covariance Matrix + covMatrix_data = np.diag(tot_sigma2s) + + for i, sample in enumerate(samples): + + # Simulation run + y_m = model_outputs[out][i] + + # Surrogate prediction + y_m_hat = OutputRS[out][i] + + # CovMatrix with the surrogate error + # covMatrix = np.diag(stdOutputRS[out][i]**2) + covMatrix = np.diag((y_m-y_m_hat)**2) + covMatrix = np.diag( + np.mean((model_outputs[out]-OutputRS[out]), axis=0)**2 + ) + + # Compute likelilhood output vs data + logLik_data[i] += self.__logpdf( + y_m_hat, data, covMatrix_data + ) + + # Compute likelilhood output vs surrogate + logLik_model[i] += self.__logpdf(y_m_hat, y_m, covMatrix) + + # Weight + logLik_data -= logBME + weights = np.exp(logLik_model+logLik_data) + + return np.log(np.mean(weights)) + + # ------------------------------------------------------------------------- + def __logpdf(self, x, mean, cov): + """ + computes the likelihood based on a multivariate normal distribution. + + Parameters + ---------- + x : TYPE + DESCRIPTION. + mean : array_like + Observation data. + cov : 2d array + Covariance matrix of the distribution. + + Returns + ------- + log_lik : float + Log likelihood. + + """ + n = len(mean) + L = linalg.cholesky(cov, lower=True) + beta = np.sum(np.log(np.diag(L))) + dev = x - mean + alpha = dev.dot(linalg.cho_solve((L, True), dev)) + log_lik = -0.5 * alpha - beta - n / 2. * np.log(2 * np.pi) + + return log_lik + + # ------------------------------------------------------------------------- + def __posteriorPlot(self, posterior, par_names, key): + + # Initialization + newpath = (r'Outputs_SeqPosteriorComparison/posterior') + os.makedirs(newpath, exist_ok=True) + + bound_tuples = self.MetaModel.bound_tuples + n_params = len(par_names) + font_size = 40 + if n_params == 2: + + figPosterior, ax = plt.subplots(figsize=(15, 15)) + + sns.kdeplot(x=posterior[:, 0], y=posterior[:, 1], + fill=True, ax=ax, cmap=plt.cm.jet, + clip=bound_tuples) + # Axis labels + plt.xlabel(par_names[0], fontsize=font_size) + plt.ylabel(par_names[1], fontsize=font_size) + + # Set axis limit + plt.xlim(bound_tuples[0]) + plt.ylim(bound_tuples[1]) + + # Increase font size + plt.xticks(fontsize=font_size) + plt.yticks(fontsize=font_size) + + # Switch off the grids + plt.grid(False) + + else: + import corner + figPosterior = corner.corner(posterior, labels=par_names, + title_fmt='.2e', show_titles=True, + title_kwargs={"fontsize": 12}) + + figPosterior.savefig(f'./{newpath}/{key}.pdf', bbox_inches='tight') + plt.close() + + # Save the posterior as .npy + np.save(f'./{newpath}/{key}.npy', posterior) + + return figPosterior + + # ------------------------------------------------------------------------- + def __hellinger_distance(self, P, Q): + """ + Hellinger distance between two continuous distributions. + + The maximum distance 1 is achieved when P assigns probability zero to + every set to which Q assigns a positive probability, and vice versa. + 0 (identical) and 1 (maximally different) + + Parameters + ---------- + P : array + Reference likelihood. + Q : array + Estimated likelihood. + + Returns + ------- + float + Hellinger distance of two distributions. + + """ + mu1 = P.mean() + Sigma1 = np.std(P) + + mu2 = Q.mean() + Sigma2 = np.std(Q) + + term1 = np.sqrt(2*Sigma1*Sigma2 / (Sigma1**2 + Sigma2**2)) + + term2 = np.exp(-.25 * (mu1 - mu2)**2 / (Sigma1**2 + Sigma2**2)) + + H_squared = 1 - term1 * term2 + + return np.sqrt(H_squared) + + # ------------------------------------------------------------------------- + def __BME_Calculator(self, MetaModel, obs_data, sigma2Dict, rmse=None): + """ + This function computes the Bayesian model evidence (BME) via Monte + Carlo integration. + + """ + # Initializations + if hasattr(MetaModel, 'valid_likelihoods'): + valid_likelihoods = MetaModel.valid_likelihoods + else: + valid_likelihoods = [] + + post_snapshot = MetaModel.ExpDesign.post_snapshot + if post_snapshot or len(valid_likelihoods) != 0: + newpath = (r'Outputs_SeqPosteriorComparison/likelihood_vs_ref') + os.makedirs(newpath, exist_ok=True) + + SamplingMethod = 'random' + MCsize = 10000 + ESS = 0 + + # Estimation of the integral via Monte Varlo integration + while (ESS > MCsize) or (ESS < 1): + + # Generate samples for Monte Carlo simulation + X_MC = MetaModel.ExpDesign.generate_samples( + MCsize, SamplingMethod + ) + + # Monte Carlo simulation for the candidate design + m_1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + Y_MC, std_MC = MetaModel.eval_metamodel(samples=X_MC) + m_2 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + print(f"\nMemory eval_metamodel in BME: {m_2-m_1:.2f} MB") + + # Likelihood computation (Comparison of data and + # simulation results via PCE with candidate design) + Likelihoods = self.__normpdf( + Y_MC, std_MC, obs_data, sigma2Dict, rmse + ) + + # Check the Effective Sample Size (1000<ESS<MCsize) + ESS = 1 / np.sum(np.square(Likelihoods/np.sum(Likelihoods))) + + # Enlarge sample size if it doesn't fulfill the criteria + if (ESS > MCsize) or (ESS < 1): + print(f'ESS={ESS} MC size should be larger.') + MCsize *= 10 + ESS = 0 + + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] + + # Reject the poorly performed prior + accepted = (Likelihoods/np.max(Likelihoods)) >= unif + X_Posterior = X_MC[accepted] + + # ------------------------------------------------------------ + # --- Kullback-Leibler Divergence & Information Entropy ------ + # ------------------------------------------------------------ + # Prior-based estimation of BME + logBME = np.log(np.nanmean(Likelihoods)) + + # TODO: Correction factor + # log_weight = self.__corr_factor_BME(obs_data, sigma2Dict, logBME) + + # Posterior-based expectation of likelihoods + postExpLikelihoods = np.mean(np.log(Likelihoods[accepted])) + + # Posterior-based expectation of prior densities + postExpPrior = np.mean( + np.log(MetaModel.ExpDesign.JDist.pdf(X_Posterior.T)) + ) + + # Calculate Kullback-Leibler Divergence + # KLD = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) + KLD = postExpLikelihoods - logBME + + # Information Entropy based on Entropy paper Eq. 38 + infEntropy = logBME - postExpPrior - postExpLikelihoods + + # If post_snapshot is True, plot likelihood vs refrence + if post_snapshot or valid_likelihoods: + # Hellinger distance + ref_like = np.log(valid_likelihoods[valid_likelihoods > 0]) + est_like = np.log(Likelihoods[Likelihoods > 0]) + distHellinger = self.__hellinger_distance(ref_like, est_like) + + idx = len([name for name in os.listdir(newpath) if 'Likelihoods_' + in name and os.path.isfile(os.path.join(newpath, name))]) + fig, ax = plt.subplots() + try: + sns.kdeplot(np.log(valid_likelihoods[valid_likelihoods > 0]), + shade=True, color="g", label='Ref. Likelihood') + sns.kdeplot(np.log(Likelihoods[Likelihoods > 0]), shade=True, + color="b", label='Likelihood with PCE') + except: + pass + + text = f"Hellinger Dist.={distHellinger:.3f}\n logBME={logBME:.3f}" + "\n DKL={KLD:.3f}" + + plt.text(0.05, 0.75, text, bbox=dict(facecolor='wheat', + edgecolor='black', + boxstyle='round,pad=1'), + transform=ax.transAxes) + + fig.savefig(f'./{newpath}/Likelihoods_{idx}.pdf', + bbox_inches='tight') + plt.close() + + else: + distHellinger = 0.0 + + # Bayesian inference with Emulator only for 2D problem + if post_snapshot and MetaModel.n_params == 2 and not idx % 5: + from bayes_inference.bayes_inference import BayesInference + from bayes_inference.discrepancy import Discrepancy + import pandas as pd + BayesOpts = BayesInference(MetaModel) + BayesOpts.emulator = True + BayesOpts.plot_post_pred = False + + # Select the inference method + import emcee + BayesOpts.inference_method = "MCMC" + # Set the MCMC parameters passed to self.mcmc_params + BayesOpts.mcmc_params = { + 'n_steps': 1e5, + 'n_walkers': 30, + 'moves': emcee.moves.KDEMove(), + 'verbose': False + } + + # ----- Define the discrepancy model ------- + obs_data = pd.DataFrame(obs_data, columns=self.Model.Output.names) + BayesOpts.measurement_error = obs_data + + # # -- (Option B) -- + DiscrepancyOpts = Discrepancy('') + DiscrepancyOpts.type = 'Gaussian' + DiscrepancyOpts.parameters = obs_data**2 + BayesOpts.Discrepancy = DiscrepancyOpts + # Start the calibration/inference + Bayes_PCE = BayesOpts.create_inference() + X_Posterior = Bayes_PCE.posterior_df.values + + return (logBME, KLD, X_Posterior, Likelihoods, distHellinger) + + # ------------------------------------------------------------------------- + def __validError(self, MetaModel): + + # MetaModel = self.MetaModel + Model = MetaModel.ModelObj + OutputName = Model.Output.names + + # Extract the original model with the generated samples + valid_samples = MetaModel.valid_samples + valid_model_runs = MetaModel.valid_model_runs + + # Run the PCE model with the generated samples + m_1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + valid_PCE_runs, _ = MetaModel.eval_metamodel(samples=valid_samples) + m_2 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 + print(f"\nMemory eval_metamodel: {m_2-m_1:.2f} MB") + + rms_error = {} + valid_error = {} + # Loop over the keys and compute RMSE error. + for key in OutputName: + rms_error[key] = mean_squared_error( + valid_model_runs[key], valid_PCE_runs[key], + multioutput='raw_values', + sample_weight=None, + squared=False) + # Validation error + valid_error[key] = (rms_error[key]**2) + valid_error[key] /= np.var(valid_model_runs[key], ddof=1, axis=0) + + # Print a report table + print("\n>>>>> Updated Errors of {} <<<<<".format(key)) + print("\nIndex | RMSE | Validation Error") + print('-'*35) + print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) + in enumerate(zip(rms_error[key], + valid_error[key])))) + + return rms_error, valid_error + + # ------------------------------------------------------------------------- + def __error_Mean_Std(self): + + MetaModel = self.MetaModel + # Extract the mean and std provided by user + df_MCReference = MetaModel.ModelObj.mc_reference + + # Compute the mean and std based on the MetaModel + pce_means, pce_stds = self._compute_pce_moments(MetaModel) + + # Compute the root mean squared error + for output in MetaModel.ModelObj.Output.names: + + # Compute the error between mean and std of MetaModel and OrigModel + RMSE_Mean = mean_squared_error( + df_MCReference['mean'], pce_means[output], squared=False + ) + RMSE_std = mean_squared_error( + df_MCReference['std'], pce_means[output], squared=False + ) + + return RMSE_Mean, RMSE_std + + # ------------------------------------------------------------------------- + def _compute_pce_moments(self, MetaModel): + """ + Computes the first two moments using the PCE-based meta-model. + + Returns + ------- + pce_means: dict + The first moment (mean) of the surrogate. + pce_stds: dict + The second moment (standard deviation) of the surrogate. + + """ + outputs = MetaModel.ModelObj.Output.names + pce_means_b = {} + pce_stds_b = {} + + # Loop over bootstrap iterations + for b_i in range(MetaModel.n_bootstrap_itrs): + # Loop over the metamodels + coeffs_dicts = MetaModel.coeffs_dict[f'b_{b_i+1}'].items() + means = {} + stds = {} + for output, coef_dict in coeffs_dicts: + + pce_mean = np.zeros((len(coef_dict))) + pce_var = np.zeros((len(coef_dict))) + + for index, values in coef_dict.items(): + idx = int(index.split('_')[1]) - 1 + coeffs = MetaModel.coeffs_dict[f'b_{b_i+1}'][output][index] + + # Mean = c_0 + if coeffs[0] != 0: + pce_mean[idx] = coeffs[0] + else: + clf_poly = MetaModel.clf_poly[f'b_{b_i+1}'][output] + pce_mean[idx] = clf_poly[index].intercept_ + # Var = sum(coeffs[1:]**2) + pce_var[idx] = np.sum(np.square(coeffs[1:])) + + # Save predictions for each output + if MetaModel.dim_red_method.lower() == 'pca': + PCA = MetaModel.pca[f'b_{b_i+1}'][output] + means[output] = PCA.inverse_transform(pce_mean) + stds[output] = PCA.inverse_transform(np.sqrt(pce_var)) + else: + means[output] = pce_mean + stds[output] = np.sqrt(pce_var) + + # Save predictions for each bootstrap iteration + pce_means_b[b_i] = means + pce_stds_b[b_i] = stds + + # Change the order of nesting + mean_all = {} + for i in sorted(pce_means_b): + for k, v in pce_means_b[i].items(): + if k not in mean_all: + mean_all[k] = [None] * len(pce_means_b) + mean_all[k][i] = v + std_all = {} + for i in sorted(pce_stds_b): + for k, v in pce_stds_b[i].items(): + if k not in std_all: + std_all[k] = [None] * len(pce_stds_b) + std_all[k][i] = v + + # Back transformation if PCA is selected. + pce_means, pce_stds = {}, {} + for output in outputs: + pce_means[output] = np.mean(mean_all[output], axis=0) + pce_stds[output] = np.mean(std_all[output], axis=0) + + return pce_means, pce_stds diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py index e24652d53..ddd0ad852 100644 --- a/src/bayesvalidrox/surrogate_models/sequential_design.py +++ b/src/bayesvalidrox/surrogate_models/sequential_design.py @@ -44,7 +44,7 @@ class SeqDesign(): Returns ------- - PCEModel : object + MetaModel : object Meta model object. """ @@ -63,7 +63,7 @@ class SeqDesign(): MetaModel.seqRMSEStd = {} MetaModel.seqMinDist = [] pce = True if MetaModel.meta_model_type.lower() != 'gpe' else False - mc_ref = True if hasattr(Model, 'mc_reference') else False + mc_ref = True if bool(Model.mc_reference) else False if mc_ref: Model.read_mc_reference() @@ -92,31 +92,30 @@ class SeqDesign(): else: obs_data = [] TotalSigma2 = {} - # ---------- Initial PCEModel ---------- - PCEModel = MetaModel#.train_norm_design(Model) - initPCEModel = deepcopy(PCEModel) + # ---------- Initial MetaModel ---------- + initMetaModel = deepcopy(MetaModel) - # Check if discrepancy is provided - if len(obs_data) != 0 and hasattr(PCEModel, 'Discrepancy'): - TotalSigma2 = PCEModel.Discrepancy.parameters + # Validation error if validation set is provided. + if len(MetaModel.valid_model_runs) != 0: + init_rmse, init_valid_error = self.__validError(initMetaModel) + init_valid_error = list(init_valid_error.values()) + else: + init_rmse = None - # Validation error if validation set is provided. - if len(PCEModel.valid_model_runs) != 0: - init_rmse, init_valid_error = self.__validError(initPCEModel) - init_valid_error = list(init_valid_error.values()) - else: - init_rmse = None + # Check if discrepancy is provided + if len(obs_data) != 0 and hasattr(MetaModel, 'Discrepancy'): + TotalSigma2 = MetaModel.Discrepancy.parameters # Calculate the initial BME out = self.__BME_Calculator( - initPCEModel, obs_data, TotalSigma2, init_rmse) + initMetaModel, obs_data, TotalSigma2, init_rmse) init_BME, init_KLD, init_post, init_likes, init_dist_hellinger = out print(f"\nInitial BME: {init_BME:.2f}") print(f"Initial KLD: {init_KLD:.2f}") # Posterior snapshot (initial) if post_snapshot: - parNames = PCEModel.ExpDesign.par_names + parNames = MetaModel.ExpDesign.par_names print('Posterior snapshot (initial) is being plotted...') self.__posteriorPlot(init_post, parNames, 'SeqPosterior_init') @@ -127,21 +126,21 @@ class SeqDesign(): f" {init_rmse_std}") # Read the initial experimental design - Xinit = initPCEModel.ExpDesign.X - init_n_samples = len(PCEModel.ExpDesign.X) - initYprev = initPCEModel.ModelOutputDict - initLCerror = initPCEModel.LCerror + Xinit = initMetaModel.ExpDesign.X + init_n_samples = len(MetaModel.ExpDesign.X) + initYprev = initMetaModel.ModelOutputDict + initLCerror = initMetaModel.LCerror n_itrs = max_n_samples - init_n_samples # Read the initial ModifiedLOO if pce: Scores_all, varExpDesignY = [], [] for out_name in output_name: - y = initPCEModel.ExpDesign.Y[out_name] + y = initMetaModel.ExpDesign.Y[out_name] Scores_all.append(list( - initPCEModel.score_dict['b_1'][out_name].values())) - if PCEModel.dim_red_method.lower() == 'pca': - pca = PCEModel.pca['b_1'][out_name] + initMetaModel.score_dict['b_1'][out_name].values())) + if MetaModel.dim_red_method.lower() == 'pca': + pca = MetaModel.pca['b_1'][out_name] components = pca.transform(y) varExpDesignY.append(np.var(components, axis=0)) else: @@ -152,22 +151,22 @@ class SeqDesign(): init_mod_LOO = [np.average([1-score for score in Scores], weights=weights)] - prevPCEModel_dict = {} + prevMetaModel_dict = {} # Replicate the sequential design for repIdx in range(n_replication): print(f'\n>>>> Replication: {repIdx+1}<<<<') # To avoid changes ub original aPCE object - PCEModel.ExpDesign.X = Xinit - PCEModel.ExpDesign.Y = initYprev - PCEModel.LCerror = initLCerror + MetaModel.ExpDesign.X = Xinit + MetaModel.ExpDesign.Y = initYprev + MetaModel.LCerror = initLCerror for util_f in util_func: print(f'\n>>>> Utility Function: {util_f} <<<<') # To avoid changes ub original aPCE object - PCEModel.ExpDesign.X = Xinit - PCEModel.ExpDesign.Y = initYprev - PCEModel.LCerror = initLCerror + MetaModel.ExpDesign.X = Xinit + MetaModel.ExpDesign.Y = initYprev + MetaModel.LCerror = initLCerror # Set the experimental design Xprev = Xinit @@ -182,7 +181,7 @@ class SeqDesign(): print("\nInitial ModifiedLOO:", init_mod_LOO) SeqModifiedLOO = np.array(init_mod_LOO) - if len(PCEModel.valid_model_runs) != 0: + if len(MetaModel.valid_model_runs) != 0: SeqValidError = np.array(init_valid_error) # Check if data is provided @@ -201,21 +200,21 @@ class SeqDesign(): print(f'\n>>>> Iteration number {itr_no} <<<<') # Save the metamodel prediction before updating - prevPCEModel_dict[itr_no] = deepcopy(PCEModel) + prevMetaModel_dict[itr_no] = deepcopy(MetaModel) if itr_no > 1: - pc_model = prevPCEModel_dict[itr_no-1] + pc_model = prevMetaModel_dict[itr_no-1] self._y_hat_prev, _ = pc_model.eval_metamodel( samples=Xfull[-1].reshape(1, -1)) # Optimal Bayesian Design m_1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 - PCEModel.ExpDesignFlag = 'sequential' + MetaModel.ExpDesignFlag = 'sequential' Xnew, updatedPrior = self.opt_SeqDesign(TotalSigma2, n_canddidate, util_f) m_2 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 S = np.min(distance.cdist(Xinit, Xnew, 'euclidean')) - PCEModel.seqMinDist.append(S) + MetaModel.seqMinDist.append(S) print(f"\nmin Dist from OldExpDesign: {S:2f}") print("\n") @@ -225,11 +224,11 @@ class SeqDesign(): ) total_n_samples += Xnew.shape[0] # ------ Plot the surrogate model vs Origninal Model ------ - if hasattr(PCEModel, 'adapt_verbose') and \ - PCEModel.adapt_verbose: + if hasattr(MetaModel, 'adapt_verbose') and \ + MetaModel.adapt_verbose: from .adaptPlot import adaptPlot - y_hat, std_hat = PCEModel.eval_metamodel(samples=Xnew) - adaptPlot(PCEModel, Ynew, y_hat, std_hat, plotED=False) + y_hat, std_hat = MetaModel.eval_metamodel(samples=Xnew) + adaptPlot(MetaModel, Ynew, y_hat, std_hat, plotED=False) # -------- Retrain the surrogate model ------- # Extend new experimental design @@ -238,30 +237,30 @@ class SeqDesign(): # Updating experimental design Y for out_name in output_name: Yfull = np.vstack((Yprev[out_name], Ynew[out_name])) - PCEModel.ModelOutputDict[out_name] = Yfull + MetaModel.ModelOutputDict[out_name] = Yfull # Pass new design to the metamodel object - PCEModel.ExpDesign.sampling_method = 'user' - PCEModel.ExpDesign.X = Xfull - PCEModel.ExpDesign.Y = PCEModel.ModelOutputDict + MetaModel.ExpDesign.sampling_method = 'user' + MetaModel.ExpDesign.X = Xfull + MetaModel.ExpDesign.Y = MetaModel.ModelOutputDict # Save the Experimental Design for next iteration Xprev = Xfull - Yprev = PCEModel.ModelOutputDict + Yprev = MetaModel.ModelOutputDict # Pass the new prior as the input - PCEModel.input_obj.poly_coeffs_flag = False + MetaModel.input_obj.poly_coeffs_flag = False if updatedPrior is not None: - PCEModel.input_obj.poly_coeffs_flag = True + MetaModel.input_obj.poly_coeffs_flag = True print("updatedPrior:", updatedPrior.shape) # Arbitrary polynomial chaos for i in range(updatedPrior.shape[1]): - PCEModel.input_obj.Marginals[i].dist_type = None + MetaModel.input_obj.Marginals[i].dist_type = None x = updatedPrior[:, i] - PCEModel.input_obj.Marginals[i].raw_data = x + MetaModel.input_obj.Marginals[i].raw_data = x # Train the surrogate model for new ExpDesign - PCEModel = PCEModel.train_norm_design(Model, parallel=True) + MetaModel.train_norm_design(parallel=False) m_3 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 # -------- Evaluate the retrained surrogate model ------- @@ -269,11 +268,11 @@ class SeqDesign(): if pce: Scores_all, varExpDesignY = [], [] for out_name in output_name: - y = PCEModel.ExpDesign.Y[out_name] + y = MetaModel.ExpDesign.Y[out_name] Scores_all.append(list( - PCEModel.score_dict['b_1'][out_name].values())) - if PCEModel.dim_red_method.lower() == 'pca': - pca = PCEModel.pca['b_1'][out_name] + MetaModel.score_dict['b_1'][out_name].values())) + if MetaModel.dim_red_method.lower() == 'pca': + pca = MetaModel.pca['b_1'][out_name] components = pca.transform(y) varExpDesignY.append(np.var(components, axis=0)) @@ -291,8 +290,8 @@ class SeqDesign(): print('\n') # Compute the validation error - if len(PCEModel.valid_model_runs) != 0: - rmse, validError = self.__validError(PCEModel) + if len(MetaModel.valid_model_runs) != 0: + rmse, validError = self.__validError(MetaModel) ValidError = list(validError.values()) else: rmse = None @@ -301,7 +300,7 @@ class SeqDesign(): if pce: SeqModifiedLOO = np.vstack( (SeqModifiedLOO, ModifiedLOO)) - if len(PCEModel.valid_model_runs) != 0: + if len(MetaModel.valid_model_runs) != 0: SeqValidError = np.vstack( (SeqValidError, ValidError)) m_4 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 @@ -309,7 +308,7 @@ class SeqDesign(): # Check if data is provided if len(obs_data) != 0: # Calculate the initial BME - out = self.__BME_Calculator(PCEModel, obs_data, + out = self.__BME_Calculator(MetaModel, obs_data, TotalSigma2, rmse) BME, KLD, Posterior, likes, DistHellinger = out print('\n') @@ -318,9 +317,9 @@ class SeqDesign(): print('\n') # Plot some snapshots of the posterior - step_snapshot = PCEModel.ExpDesign.step_snapshot + step_snapshot = MetaModel.ExpDesign.step_snapshot if post_snapshot and postcnt % step_snapshot == 0: - parNames = PCEModel.ExpDesign.par_names + parNames = MetaModel.ExpDesign.par_names print('Posterior snapshot is being plotted...') self.__posteriorPlot(Posterior, parNames, f'SeqPosterior_{postcnt}') @@ -368,21 +367,21 @@ class SeqDesign(): # Store updated ModifiedLOO and BME in dictonary strKey = f'{util_f}_rep_{repIdx+1}' if pce: - PCEModel.SeqModifiedLOO[strKey] = SeqModifiedLOO - if len(PCEModel.valid_model_runs) != 0: - PCEModel.seqValidError[strKey] = SeqValidError + MetaModel.SeqModifiedLOO[strKey] = SeqModifiedLOO + if len(MetaModel.valid_model_runs) != 0: + MetaModel.seqValidError[strKey] = SeqValidError # Check if data is provided if len(obs_data) != 0: - PCEModel.SeqBME[strKey] = SeqBME - PCEModel.SeqKLD[strKey] = SeqKLD - if len(PCEModel.valid_likelihoods) != 0: - PCEModel.SeqDistHellinger[strKey] = SeqDistHellinger + MetaModel.SeqBME[strKey] = SeqBME + MetaModel.SeqKLD[strKey] = SeqKLD + if len(MetaModel.valid_likelihoods) != 0: + MetaModel.SeqDistHellinger[strKey] = SeqDistHellinger if mc_ref and pce: - PCEModel.seqRMSEMean[strKey] = seqRMSEMean - PCEModel.seqRMSEStd[strKey] = seqRMSEStd + MetaModel.seqRMSEMean[strKey] = seqRMSEMean + MetaModel.seqRMSEStd[strKey] = seqRMSEStd - return PCEModel + return MetaModel # ------------------------------------------------------------------------- def util_VarBasedDesign(self, X_can, index, util_func='Entropy'): @@ -641,10 +640,10 @@ class SeqDesign(): return -1 * U_J_d # -1 is for minimization instead of maximization # ------------------------------------------------------------------------- - def update_metamodel(self, PCEModel, output, y_hat_can, univ_p_val, index, + def update_metamodel(self, MetaModel, output, y_hat_can, univ_p_val, index, new_pca=False): - BasisIndices = PCEModel.basis_dict[output]["y_"+str(index+1)] - clf_poly = PCEModel.clf_poly[output]["y_"+str(index+1)] + BasisIndices = MetaModel.basis_dict[output]["y_"+str(index+1)] + clf_poly = MetaModel.clf_poly[output]["y_"+str(index+1)] Mn = clf_poly.coef_ Sn = clf_poly.sigma_ beta = clf_poly.alpha_ @@ -666,8 +665,8 @@ class SeqDesign(): std_new = np.sqrt(np.sum(np.square(Mn_new[1:]))) # Back transformation if PCA is selected. - if PCEModel.dim_red_method.lower() == 'pca': - old_pca = PCEModel.pca[output] + if MetaModel.dim_red_method.lower() == 'pca': + old_pca = MetaModel.pca[output] mean_old = old_pca.mean_[index] mean_old += np.sum(mean_old * old_pca.components_[:, index]) std_old = np.sqrt(np.sum(std_old**2 * @@ -710,8 +709,8 @@ class SeqDesign(): # To avoid changes ub original aPCE object Model = self.Model - PCEModel = deepcopy(self.MetaModel) - old_EDY = PCEModel.ExpDesign.Y + MetaModel = deepcopy(self.MetaModel) + old_EDY = MetaModel.ExpDesign.Y # Evaluate the PCE metamodels using the candidate design Y_PC_can, Y_std_can = self.MetaModel.eval_metamodel( @@ -727,18 +726,18 @@ class SeqDesign(): means, np.diag(stds), m_size) # Create the SparseBayes-based PCE metamodel: - PCEModel.input_obj.poly_coeffs_flag = False + MetaModel.input_obj.poly_coeffs_flag = False univ_p_val = self.MetaModel.univ_basis_vals(X_can) G_n_m_all = np.zeros((m_size, len(Model.Output.names), Model.n_obs)) for i in range(m_size): for idx, key in enumerate(Model.Output.names): - if PCEModel.dim_red_method.lower() == 'pca': + if MetaModel.dim_red_method.lower() == 'pca': # Equal number of components new_outputs = np.vstack( (old_EDY[key], y_hat_samples[key][i]) ) - new_pca, _ = PCEModel.pca_transformation(new_outputs) + new_pca, _ = MetaModel.pca_transformation(new_outputs) target = new_pca.transform( y_hat_samples[key][i].reshape(1, -1) )[0] @@ -749,7 +748,7 @@ class SeqDesign(): # Update surrogate result = self.update_metamodel( - PCEModel, key, target[j], univ_p_val, j, new_pca) + MetaModel, key, target[j], univ_p_val, j, new_pca) # Compute Expected Information Gain (Eq. 39) G_n_m = np.log(result['std_old']/result['std_new']) - 1./2 @@ -785,22 +784,22 @@ class SeqDesign(): # To avoid changes ub original aPCE object Model = self.Model - PCEModel = deepcopy(self.MetaModel) - out_names = PCEModel.ModelObj.Output.names + MetaModel = deepcopy(self.MetaModel) + out_names = MetaModel.ModelObj.Output.names if X_can.ndim == 1: X_can = X_can.reshape(1, -1) - # Compute the mean and std based on the PCEModel - # pce_means, pce_stds = self._compute_pce_moments(PCEModel) + # Compute the mean and std based on the MetaModel + # pce_means, pce_stds = self._compute_pce_moments(MetaModel) if var == 'ALC': - Y_MC, Y_MC_std = PCEModel.eval_metamodel(samples=X_MC) + Y_MC, Y_MC_std = MetaModel.eval_metamodel(samples=X_MC) # Old Experimental design - oldExpDesignX = PCEModel.ExpDesign.X - oldExpDesignY = PCEModel.ExpDesign.Y + oldExpDesignX = MetaModel.ExpDesign.X + oldExpDesignY = MetaModel.ExpDesign.Y # Evaluate the PCE metamodels at that location ??? - Y_PC_can, Y_std_can = PCEModel.eval_metamodel(samples=X_can) + Y_PC_can, Y_std_can = MetaModel.eval_metamodel(samples=X_can) # Add all suggestion as new ExpDesign NewExpDesignX = np.vstack((oldExpDesignX, X_can)) @@ -813,13 +812,13 @@ class SeqDesign(): except: NewExpDesignY[key] = oldExpDesignY[key] - PCEModel.ExpDesign.sampling_method = 'user' - PCEModel.ExpDesign.X = NewExpDesignX - PCEModel.ExpDesign.Y = NewExpDesignY + MetaModel.ExpDesign.sampling_method = 'user' + MetaModel.ExpDesign.X = NewExpDesignX + MetaModel.ExpDesign.Y = NewExpDesignY # Train the model for the observed data using x_can - PCEModel.input_obj.poly_coeffs_flag = False - PCE_Model_can = PCEModel.train_norm_design(Model, parallel=False) + MetaModel.input_obj.poly_coeffs_flag = False + PCE_Model_can = MetaModel.train_norm_design(Model, parallel=False) if var.lower() == 'mi': # Mutual information based on Krause et al @@ -841,7 +840,7 @@ class SeqDesign(): # Adaptive design and analysis of supercomputer experiments Techno- # metrics, 51 (2009), pp. 130–145. - # Evaluate the PCEModel at the given samples + # Evaluate the MetaModel at the given samples Y_MC_can, Y_MC_std_can = PCE_Model_can.eval_metamodel(samples=X_MC) # Compute the score @@ -867,7 +866,7 @@ class SeqDesign(): MCsize, 'random' ) - # Evaluate the PCEModel at the given samples + # Evaluate the MetaModel at the given samples Y_MC, std_MC = PCE_Model_can.eval_metamodel(samples=X_MC) # Likelihood computation (Comparison of data and simulation @@ -1214,18 +1213,18 @@ class SeqDesign(): """ # Initialization - PCEModel = self.MetaModel - Bounds = PCEModel.bound_tuples - n_new_samples = PCEModel.ExpDesign.n_new_samples - explore_method = PCEModel.ExpDesign.explore_method - exploit_method = PCEModel.ExpDesign.exploit_method - n_cand_groups = PCEModel.ExpDesign.n_cand_groups - tradeoff_scheme = PCEModel.ExpDesign.tradeoff_scheme - - old_EDX = PCEModel.ExpDesign.X - old_EDY = PCEModel.ExpDesign.Y.copy() - ndim = PCEModel.ExpDesign.X.shape[1] - OutputNames = PCEModel.ModelObj.Output.names + MetaModel = self.MetaModel + Bounds = MetaModel.bound_tuples + n_new_samples = MetaModel.ExpDesign.n_new_samples + explore_method = MetaModel.ExpDesign.explore_method + exploit_method = MetaModel.ExpDesign.exploit_method + n_cand_groups = MetaModel.ExpDesign.n_cand_groups + tradeoff_scheme = MetaModel.ExpDesign.tradeoff_scheme + + old_EDX = MetaModel.ExpDesign.X + old_EDY = MetaModel.ExpDesign.Y.copy() + ndim = MetaModel.ExpDesign.X.shape[1] + OutputNames = MetaModel.ModelObj.Output.names # ----------------------------------------- # ----------- CUSTOMIZED METHODS ---------- @@ -1233,7 +1232,7 @@ class SeqDesign(): # Utility function exploit_method provided by user if exploit_method.lower() == 'user': - Xnew, filteredSamples = PCEModel.ExpDesign.ExploitFunction(self) + Xnew, filteredSamples = MetaModel.ExpDesign.ExploitFunction(self) print("\n") print("\nXnew:\n", Xnew) @@ -1279,11 +1278,11 @@ class SeqDesign(): # Initilize the ExploitScore array # Generate random samples - allCandidates = PCEModel.ExpDesign.generate_samples(n_candidates, + allCandidates = MetaModel.ExpDesign.generate_samples(n_candidates, 'random') # Construct error model based on LCerror - errorModel = PCEModel.create_ModelError(old_EDX, self.LCerror) + errorModel = MetaModel.create_ModelError(old_EDX, self.LCerror) self.errorModel.append(copy(errorModel)) # Evaluate the error models for allCandidates @@ -1298,7 +1297,7 @@ class SeqDesign(): else: # ------- EXPLORATION: SPACE-FILLING DESIGN ------- # Generate candidate samples from Exploration class - explore = Exploration(PCEModel, n_candidates) + explore = Exploration(MetaModel, n_candidates) explore.w = 100 # * ndim #500 # Select criterion (mc-intersite-proj-th, mc-intersite-proj) explore.mc_criterion = 'mc-intersite-proj' @@ -1353,9 +1352,9 @@ class SeqDesign(): # Create a sample pool for rejection sampling MCsize = 15000 - X_MC = PCEModel.ExpDesign.generate_samples(MCsize, 'random') - candidates = PCEModel.ExpDesign.generate_samples( - PCEModel.ExpDesign.max_func_itr, 'latin_hypercube') + X_MC = MetaModel.ExpDesign.generate_samples(MCsize, 'random') + candidates = MetaModel.ExpDesign.generate_samples( + MetaModel.ExpDesign.max_func_itr, 'latin_hypercube') # Split the candidates in groups for multiprocessing split_cand = np.array_split( @@ -1396,7 +1395,7 @@ class SeqDesign(): if np.isfinite(arr).all()] scaler = MinMaxScaler() X_S = scaler.fit_transform(candidates[good_indices]) - gp = PCEModel.gaussian_process_emulator( + gp = MetaModel.gaussian_process_emulator( X_S, U_J_d[good_indices], autoSelect=True ) U_J_d = gp.predict(scaler.transform(allCandidates)) @@ -1457,7 +1456,7 @@ class SeqDesign(): # Find sensitive region if UtilMethod == 'LOOCV': - LCerror = PCEModel.LCerror + LCerror = MetaModel.LCerror allModifiedLOO = np.zeros((len(old_EDX), len(OutputNames), nMeasurement)) for y_idx, y_key in enumerate(OutputNames): @@ -1600,35 +1599,35 @@ class SeqDesign(): X_new : array of shape (1, n_params) The new sampling location in the input space. """ - PCEModelOrig = self + MetaModelOrig = self Model = self.Model - n_new_samples = PCEModelOrig.ExpDesign.n_new_samples + n_new_samples = MetaModelOrig.ExpDesign.n_new_samples NCandidate = candidates.shape[0] # TODO: Loop over outputs OutputName = Model.Output.names[0] # To avoid changes ub original aPCE object - PCEModel = deepcopy(PCEModelOrig) + MetaModel = deepcopy(MetaModelOrig) # Old Experimental design - oldExpDesignX = PCEModel.ExpDesign.X + oldExpDesignX = MetaModel.ExpDesign.X # TODO: Only one psi can be selected. # Suggestion: Go for the one with the highest LOO error - Scores = list(PCEModel.score_dict[OutputName].values()) + Scores = list(MetaModel.score_dict[OutputName].values()) ModifiedLOO = [1-score for score in Scores] outIdx = np.argmax(ModifiedLOO) # Initialize Phi to save the criterion's values Phi = np.zeros((NCandidate)) - BasisIndices = PCEModelOrig.basis_dict[OutputName]["y_"+str(outIdx+1)] + BasisIndices = MetaModelOrig.basis_dict[OutputName]["y_"+str(outIdx+1)] P = len(BasisIndices) # ------ Old Psi ------------ - univ_p_val = PCEModelOrig.univ_basis_vals(oldExpDesignX) - Psi = PCEModelOrig.create_psi(BasisIndices, univ_p_val) + univ_p_val = MetaModelOrig.univ_basis_vals(oldExpDesignX) + Psi = MetaModelOrig.create_psi(BasisIndices, univ_p_val) # ------ New candidates (Psi_c) ------------ # Assemble Psi_c @@ -1893,16 +1892,16 @@ class SeqDesign(): return np.sqrt(H_squared) # ------------------------------------------------------------------------- - def __BME_Calculator(self, PCEModel, obs_data, sigma2Dict, rmse=None): + def __BME_Calculator(self, MetaModel, obs_data, sigma2Dict, rmse=None): """ This function computes the Bayesian model evidence (BME) via Monte Carlo integration. """ # Initializations - valid_likelihoods = PCEModel.valid_likelihoods + valid_likelihoods = MetaModel.valid_likelihoods - post_snapshot = PCEModel.ExpDesign.post_snapshot + post_snapshot = MetaModel.ExpDesign.post_snapshot if post_snapshot or len(valid_likelihoods) != 0: newpath = (r'Outputs_SeqPosteriorComparison/likelihood_vs_ref') os.makedirs(newpath, exist_ok=True) @@ -1915,13 +1914,13 @@ class SeqDesign(): while (ESS > MCsize) or (ESS < 1): # Generate samples for Monte Carlo simulation - X_MC = PCEModel.ExpDesign.generate_samples( + X_MC = MetaModel.ExpDesign.generate_samples( MCsize, SamplingMethod ) # Monte Carlo simulation for the candidate design m_1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 - Y_MC, std_MC = PCEModel.eval_metamodel(samples=X_MC) + Y_MC, std_MC = MetaModel.eval_metamodel(samples=X_MC) m_2 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 print(f"\nMemory eval_metamodel in BME: {m_2-m_1:.2f} MB") @@ -1962,7 +1961,7 @@ class SeqDesign(): # Posterior-based expectation of prior densities postExpPrior = np.mean( - np.log(PCEModel.ExpDesign.JDist.pdf(X_Posterior.T)) + np.log(MetaModel.ExpDesign.JDist.pdf(X_Posterior.T)) ) # Calculate Kullback-Leibler Divergence @@ -2006,11 +2005,11 @@ class SeqDesign(): distHellinger = 0.0 # Bayesian inference with Emulator only for 2D problem - if post_snapshot and PCEModel.n_params == 2 and not idx % 5: + if post_snapshot and MetaModel.n_params == 2 and not idx % 5: from bayes_inference.bayes_inference import BayesInference from bayes_inference.discrepancy import Discrepancy import pandas as pd - BayesOpts = BayesInference(PCEModel) + BayesOpts = BayesInference(MetaModel) BayesOpts.emulator = True BayesOpts.plot_post_pred = False @@ -2047,7 +2046,7 @@ class SeqDesign(): # ------------------------------------------------------------------------- def __validError(self, MetaModel): - # PCEModel = self.MetaModel + # MetaModel = self.MetaModel Model = MetaModel.ModelObj OutputName = Model.Output.names @@ -2088,17 +2087,17 @@ class SeqDesign(): # ------------------------------------------------------------------------- def __error_Mean_Std(self): - PCEModel = self.MetaModel + MetaModel = self.MetaModel # Extract the mean and std provided by user - df_MCReference = PCEModel.ModelObj.mc_reference + df_MCReference = MetaModel.ModelObj.mc_reference - # Compute the mean and std based on the PCEModel - pce_means, pce_stds = self._compute_pce_moments(PCEModel) + # Compute the mean and std based on the MetaModel + pce_means, pce_stds = self._compute_pce_moments(MetaModel) # Compute the root mean squared error - for output in PCEModel.ModelObj.Output.names: + for output in MetaModel.ModelObj.Output.names: - # Compute the error between mean and std of PCEModel and OrigModel + # Compute the error between mean and std of MetaModel and OrigModel RMSE_Mean = mean_squared_error( df_MCReference['mean'], pce_means[output], squared=False ) -- GitLab