From 95d38aea64032f66e3d8f703d81dec8331e07a22 Mon Sep 17 00:00:00 2001 From: faridm69 <faridmohammadi69@gmail.com> Date: Fri, 14 Aug 2020 18:18:15 +0200 Subject: [PATCH] [pylink] added a new feature that saves the experimental design as a hdf5 metadata. --- .../PostProcessing/PostProcessing.py | 7 +- BayesValidRox/PyLink/FuncForwardModel.py | 105 ++++++++-- BayesValidRox/surrogate_models/ExpDesigns.py | 20 +- .../surrogate_models/RegressionFastARD.py | 9 +- .../surrogate_models/surrogate_models.py | 194 +++++++++++------- .../Test_AnalyticalFunction.py | 21 +- README.md | 1 + configure.sh | 1 + 8 files changed, 242 insertions(+), 116 deletions(-) diff --git a/BayesValidRox/PostProcessing/PostProcessing.py b/BayesValidRox/PostProcessing/PostProcessing.py index a3e17aba7..21e2bb6e4 100644 --- a/BayesValidRox/PostProcessing/PostProcessing.py +++ b/BayesValidRox/PostProcessing/PostProcessing.py @@ -549,6 +549,7 @@ class PostProcessing: """ PCEModel = self.PCEModel + NrofInitSamples = PCEModel.ExpDesign.initNrSamples totalNSamples = PCEModel.ExpDesign.X.shape[0] plotList = ['Modified LOO error', 'Validation error', 'KLD', 'BME', @@ -692,7 +693,9 @@ class PostProcessing: # Plot baseline for zero, i.e. no difference plt.axhline(y=1.0, xmin=0, xmax=1, c='green', ls='--', lw=2) - + + # Set the limits + plt.ylim([1e-1, 1e1]) else: values = SeqValues @@ -702,8 +705,6 @@ class PostProcessing: plt.semilogy(x_idx, values, marker=markers[idx], color=colors[idx], ls='--', lw=2, label=name.split("_rep",1)[0]) - # Set the limits - #plt.ylim([1e-1, 1e1]) else: plotLabel = plot diff --git a/BayesValidRox/PyLink/FuncForwardModel.py b/BayesValidRox/PyLink/FuncForwardModel.py index cd50b552d..6bd9be31d 100644 --- a/BayesValidRox/PyLink/FuncForwardModel.py +++ b/BayesValidRox/PyLink/FuncForwardModel.py @@ -12,6 +12,11 @@ import pandas as pd import tqdm from functools import reduce import multiprocessing +import h5py +try: + import cPickle as pickle +except ModuleNotFoundError: + import pickle class FuncForwardModel: @@ -19,6 +24,8 @@ class FuncForwardModel: #TODO: This must be set in the main file. self.Name = None self.pyFile = self.Name + self.outMin = None + self.outMax = None self.Observations = {} self.ObservationsValid = {} self.OutputMatrix = {} @@ -36,6 +43,11 @@ class FuncForwardModel: def Run_Model_Parallel(self, CollocationPoints, prevRun_No=0, keyString=''): + # Create hdf5 metadata + hdf5file = 'ExpDesign'+'_'+self.Name+'.hdf5' + hdf5_exist = os.path.exists(hdf5file) + f = h5py.File(hdf5file, 'a') + # Initilization P = len(CollocationPoints) OutputNames = self.Output.Names @@ -53,30 +65,87 @@ class FuncForwardModel: # Save time steps or x-values x_values = group_results[0][0] TotalOutputs["x_values"] = x_values + if not hdf5_exist: f.create_dataset("x_values", data=x_values) # save each output in their corresponding array - for varIdx, var in enumerate(OutputNames): - TotalOutputs[var] = np.asarray([item[varIdx+1] for item in group_results], dtype=np.float64) + outRangeIdx = [] + for varIdx, var in enumerate(OutputNames): + grpY = f.create_group("EDY/"+var) if not hdf5_exist else f.get("EDY/"+var) + Outputs = np.asarray([item[varIdx+1] for item in group_results], dtype=np.float64) + if prevRun_No == 0: + grpY.create_dataset("init_"+keyString, data=CollocationPoints) + else: + try: + oldEDY = np.array(f['EDY/'+var+'/adaptive_'+keyString]) + del f['EDY/'+var+'/adaptive_'+keyString] + data = np.vstack((oldEDY, Outputs)) + except: + data = Outputs + grpY.create_dataset('adaptive_'+keyString, data=data) + + # Check if all outputs lay between provided min and max + if self.outMin is not None and P > 1: + for outIdx, out in enumerate(Outputs): + if not self.within_range(out, self.outMin, self.outMax): + outRangeIdx.append(outIdx) + + TotalOutputs[var] = np.delete(Outputs, outRangeIdx, axis=0) + if prevRun_No == 0: + grpY.create_dataset("New_init_"+keyString, data=TotalOutputs[var]) + else: + try: + oldEDY = np.array(f['EDY/'+var+'/New_adaptive_'+keyString]) + del f['EDY/'+var+'/New_adaptive_'+keyString] + data = np.vstack((oldEDY, TotalOutputs[var])) + except: + data = TotalOutputs[var] + grpY.create_dataset('New_adaptive_'+keyString, data=data) + + # Print the collocation points whose simulations crashed + if len(outRangeIdx) != 0: + print('\n') + print('*'*20) + print("\nThe following parametersets have been removed:\n", + CollocationPoints[outRangeIdx]) + print("\n") + print('*'*20) - # Pass it to the attribute + # Pass it to the attribute + NewCollocationPoint = np.delete(CollocationPoints, outRangeIdx, axis=0) self.OutputMatrix = TotalOutputs - return self.OutputMatrix, CollocationPoints + # Save CollocationPoints as .npy + grpX = f.create_group("EDX") if not hdf5_exist else f.get("EDX") + if prevRun_No == 0: + grpX.create_dataset("init_"+keyString, data=CollocationPoints) + if len(outRangeIdx) != 0: + grpX.create_dataset("New_init_"+keyString, data=NewCollocationPoint) + + else: + try: + oldCollocationPoints = np.array(f['EDX/'+'adaptive_'+keyString]) + del f['EDX/'+'adaptive_'+keyString] + data = np.vstack((oldCollocationPoints, NewCollocationPoint)) + except: + data = NewCollocationPoint + grpX.create_dataset('adaptive_'+keyString, data=data) + + if len(outRangeIdx) != 0: + try: + oldCollocationPoints = np.array(f['EDX/New_'+'adaptive_'+keyString]) + del f['EDX/New_'+'adaptive_'+keyString] + data = np.vstack((oldCollocationPoints, NewCollocationPoint)) + except: + data = NewCollocationPoint + grpX.create_dataset('New_adaptive_'+keyString, data=data) + + return self.OutputMatrix, NewCollocationPoint - def Run_Model(self, Xnew, TotalNSamples, noise_amount = 0.02): - - CollocationPoints = np.array([Xnew]) - Filename = self.pyFile - Function = getattr(__import__(Filename), Filename) - - y = Function(CollocationPoints) - - # Generate our data by adding some noise on to the generative function - noise = np.std(y) * noise_amount - - ModelOutputs = np.add(y , np.random.normal(0, 1, len(CollocationPoints[:,0])) * noise)[:,np.newaxis] - - return ModelOutputs + def within_range(self, out, minout, maxout): + inside = False + if (out > minout).all() and (out < maxout).all(): + inside = True + return inside def read_Observation(self): obsDataFrame = pd.DataFrame.from_dict(self.Observations) diff --git a/BayesValidRox/surrogate_models/ExpDesigns.py b/BayesValidRox/surrogate_models/ExpDesigns.py index 4de716cf4..4a255fb9a 100644 --- a/BayesValidRox/surrogate_models/ExpDesigns.py +++ b/BayesValidRox/surrogate_models/ExpDesigns.py @@ -19,7 +19,7 @@ class ExpDesigns: def __init__(self, Input): self.NrSamples = None self.InputObj = Input - self.MCSize = 100000 + self.MCSize = 10000 self.Input_distributions = [] self.BoundTuples = [] self.SamplingMethod = 'MC' @@ -42,7 +42,7 @@ class ExpDesigns: self.polycoeffs = {} self.X = None self.Y = None - + self.hdf5File = None def GetSample(self, NrSamples, SamplingMethod='random', MaxPceDegree=None): """ @@ -87,7 +87,7 @@ class ExpDesigns: self.Input_distributions, self.BoundTuples = self.Parameter_Initialization() # Create ExpDesign in the actual space - X = chaospy.generate_samples(NrSamples, domain=self.JDist , rule=SamplingMethod).T + samples = chaospy.generate_samples(NrSamples, domain=self.JDist , rule=SamplingMethod).T else: @@ -96,16 +96,18 @@ class ExpDesigns: # Generate the samples based on requested method if self.SamplingMethod == 'user': - X = self.X - self.NrSamples = len(X) - self.Input_distributions, self.BoundTuples = self.Parameter_Initialization(MaxPceDegree) + samples = self.X + self.NrSamples = len(samples) + # if len(self.Input_distributions)==0: + if self.arbitrary: + self.Input_distributions, self.BoundTuples = self.Parameter_Initialization(MaxPceDegree) elif self.SamplingMethod == 'random': - X = self.MCSampling(NrSamples,MaxPceDegree) + samples = self.MCSampling(NrSamples,MaxPceDegree) elif self.SamplingMethod == 'PCM' or self.SamplingMethod == 'LSCM': - X = self.PCMSampling(MaxPceDegree) + samples = self.PCMSampling(MaxPceDegree) else: raise Exception('Unfortunately, for the prescribed inputs, the samepling method ' @@ -114,7 +116,7 @@ class ExpDesigns: if self.arbitrary: # Naive approach: Fit a gaussian kernel to the provided data self.JDist = st.gaussian_kde(self.Input_distributions) - return X + return samples def DistConstructor(self): Inputs = self.InputObj diff --git a/BayesValidRox/surrogate_models/RegressionFastARD.py b/BayesValidRox/surrogate_models/RegressionFastARD.py index da9a1ff1e..2d92080c9 100755 --- a/BayesValidRox/surrogate_models/RegressionFastARD.py +++ b/BayesValidRox/surrogate_models/RegressionFastARD.py @@ -8,6 +8,8 @@ Created on Tue Mar 24 19:41:45 2020 import numpy as np from scipy.linalg import solve_triangular from numpy.linalg import LinAlgError +from sklearn.base import RegressorMixin +from sklearn.linear_model.base import LinearModel import warnings from sklearn.utils import check_X_y,check_array,as_float_array from scipy.linalg import pinvh @@ -85,7 +87,7 @@ def update_precisions(Q,S,q,s,A,active,tol,n_samples,clf_bias): #-------------------------- Regression ARD ------------------------------------ -class RegressionFastARD(): +class RegressionFastARD(LinearModel,RegressorMixin): ''' Regression with Automatic Relevance Determination (Fast Version uses Sparse Bayesian Learning) @@ -270,6 +272,7 @@ class RegressionFastARD(): # update precision parameter for noise distribution rss = np.sum( ( y - np.dot(X[:,active] , Mn) )**2 ) + # if near perfect fit , then terminate if rss / n_samples < self.tol: warnings.warn('Early termination due to near perfect fit') @@ -277,7 +280,7 @@ class RegressionFastARD(): break beta = n_samples - np.sum(active) + np.sum(Aa * Sdiag ) beta /= ( rss + np.finfo(np.float32).eps ) - + # update precision parameters of coefficients A,converged = update_precisions(Q,S,q,s,A,active,self.tol, n_samples,False) @@ -308,7 +311,7 @@ class RegressionFastARD(): self.converged = converged if self.compute_score: self.scores_ = np.array(scores_) - #self._set_intercept(X_mean,y_mean,X_std) + self._set_intercept(X_mean,y_mean,X_std) return self def log_marginal_likelihood(self, XXa,XYa, Aa, beta): diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py index bcae6321f..f5b12d9e8 100644 --- a/BayesValidRox/surrogate_models/surrogate_models.py +++ b/BayesValidRox/surrogate_models/surrogate_models.py @@ -12,6 +12,7 @@ import warnings warnings.filterwarnings("ignore") import numpy as np import math +import h5py from mpl_toolkits import mplot3d import matplotlib.pyplot as plt from matplotlib.offsetbox import AnchoredText @@ -52,6 +53,7 @@ from .ExpDesigns import ExpDesigns from .aPoly_Construction import aPoly_Construction from .Exploration import Exploration from .RegressionFastARD import RegressionFastARD +from .VarLinearRegression import VarLinearRegression class Metamodel: @@ -99,7 +101,7 @@ class Metamodel: self.validLikelihoods = [] self.index = None - + self.adaptVerbose = False self.Likelihoods = None self.ModelObj = None @@ -171,27 +173,60 @@ class Metamodel: self.ExpDesign = ExpDesigns(self.Inputs) def Exe_ExpDesign(self, Model): + + if self.ExpDesignFlag != 'sequential': + # Read ExpDesign from the provided hdf5 + if self.ExpDesign.hdf5File is not None: + + # Read hdf5 file + f = h5py.File(self.ExpDesign.hdf5File, 'a') + + # Read EDX and pass it to ExpDesign object + try: + self.ExpDesign.X = np.array(f["EDX/New_init_"]) + except: + self.ExpDesign.X = np.array(f["EDX/init_"]) + + self.ExpDesign.NrSamples = self.ExpDesign.X.shape[0] + + # Read EDX and pass it to ExpDesign object + OutputNames = self.ModelObj.Output.Names + self.ExpDesign.Y = {} + + self.ExpDesign.Y["x_values"] = np.array(f["x_values"]) + + for varIdx, var in enumerate(OutputNames): + try: + self.ExpDesign.Y[var] = np.array(f["EDY/"+var+"/New_init_"]) + except: + self.ExpDesign.Y[var] = np.array(f["EDY/"+var+"/init_"]) + + else: + # Check if an old hdf5 file exists: if yes, rename + hdf5file = 'ExpDesign'+'_'+self.ModelObj.Name+'.hdf5' + if os.path.exists(hdf5file): os.rename(hdf5file,'old_'+hdf5file) + # Get the sample for X (GetSample) - self.ExpDesign.X = self.ExpDesign.GetSample(self.ExpDesign.NrSamples, self.ExpDesign.SamplingMethod, self.MaxPceDegree) - self.OptimalCollocationPointsBase = self.ExpDesign.X self.BoundTuples = self.ExpDesign.BoundTuples - # ---- Create ModelOutput ------ # Add concurrent simulations if self.ExpDesign.Y is None: - print('\nHere the forward model needs to be executed!\n') - self.ModelOutputDict, NewCollocationPoint = Model.Run_Model_Parallel(self.ExpDesign.X) - self.ExpDesign.X = NewCollocationPoint + print('\n Now the forward model needs to be run!\n') + self.ModelOutputDict, self.ExpDesign.X = Model.Run_Model_Parallel(self.ExpDesign.X) self.ExpDesign.Y = self.ModelOutputDict - else: + # Check if a dict has been passed. + if type(self.ExpDesign.Y) is dict: + self.ModelOutputDict = self.ExpDesign.Y - self.ModelOutputDict = self.ExpDesign.Y + else: + raise Exception('Please provide either a dictionary or pass a' + 'hdf5 file using the ExpDesign.hdf5File argument.') - return self.OptimalCollocationPointsBase + return self.ExpDesign.X #-------------------------------------------------------------------------------------------------------- def poly_rec_coeffs(self, n_max, polytype, parIdx_or_parms): @@ -509,13 +544,17 @@ class Metamodel: clf_poly = RegressionFastARD(start=startBasisIndices, fit_intercept=True, compute_score=compute_score, - n_iter=1500, tol=1e-3) - + n_iter=1000, tol=1e-3) + + elif RegMethod.lower() == 'varbayes': + clf_poly = VarLinearRegression(n_iter = 500, tol =1e-3) + elif RegMethod.lower() == 'lars': - clf_poly = linear_model.Lars(fit_intercept=False) + clf_poly = linear_model.Lars(fit_intercept=True) elif RegMethod.lower() == 'sgdr': - clf_poly = linear_model.SGDRegressor(fit_intercept = False, max_iter=5000, tol=1e-7) + clf_poly = linear_model.SGDRegressor(fit_intercept=True, + max_iter=5000, tol=1e-7) # Fit clf_poly.fit(PSI, ModelOutput) @@ -526,19 +565,16 @@ class Metamodel: nnz_idx = np.nonzero(clf_poly.coef_)[0] if len(nnz_idx) == 0 or nnz_idx[0] != 0: - # print("\nWarning: Adding the first regressor.") nnz_idx = np.insert(np.nonzero(clf_poly.coef_)[0], 0, 0) - # Remove the zero entries for Bases and PSI if need be - z_idx = np.where(clf_poly.coef_==0)[0] - if len(z_idx) > 0: + # Remove the zero entries for Bases and PSI if need be PolynomialDegrees_Sparse = BasisIndices.toarray()[nnz_idx] if sparsity else BasisIndices[nnz_idx] PSI_Sparse = PSI[:,nnz_idx] # Store the coefficients of the regression model (mean of distribution). clf_poly.fit(PSI_Sparse, ModelOutput) coeffs = clf_poly.coef_ - + else: # This is for the case where all outputs are zero, thereby # all coefficients are zero @@ -661,7 +697,7 @@ class Metamodel: if self.RegMethod == 'FastARD' and not clf_poly.converged and deg != 1: print("Degree {0} not converged!".format(deg)) scores[degIdx] = -1*np.inf - break + #break else: # Store the score in the scores list bestqIdx = np.nanargmax(qNormScores) @@ -779,7 +815,6 @@ class Metamodel: Psi: the orthogonal polynomials of the response surface """ - Psi = np.array(TotalPsi, dtype = float) @@ -811,7 +846,7 @@ class Metamodel: if isinstance(clf, list): residual = np.dot(Psi, Coeffs) - ModelOutputs else: - residual = clf.predict(Psi) - ModelOutputs + residual = clf.predict(TotalPsi) - ModelOutputs # Variance varY = np.var(ModelOutputs) @@ -845,8 +880,7 @@ class Metamodel: T_factor = np.inf CorrectedErrLoo = ErrLoo * T_factor - - + Q_2 = 1 - CorrectedErrLoo return Q_2, residual #LCerror @@ -895,8 +929,8 @@ class Metamodel: kernels = [1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)), 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1), - ConstantKernel(0.1, (0.01, 10.0)) - * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0)) ** 2), + # ConstantKernel(0.1, (0.01, 10.0)) + # * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0)) ** 2), 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)] @@ -940,7 +974,7 @@ class Metamodel: # Get the collocation points to run the forward model self.Exe_ExpDesign(Model) - CollocationPoints = self.OptimalCollocationPointsBase + CollocationPoints = self.ExpDesign.X OutputDict = self.ModelOutputDict.copy() # Initialize the nested dictionaries @@ -970,17 +1004,20 @@ class Metamodel: else: # Generate Basis indices matrix maxDeg = self.MaxPceDegree - nSamples, ndim = self.OptimalCollocationPointsBase.shape + nSamples, ndim = CollocationPoints.shape nitr = nSamples - self.ExpDesign.initNrSamples d = nitr if nitr != 0 and self.NofPa > 5 else 1 M_uptoMax = lambda maxDeg: np.array([math.factorial(ndim+d)//(math.factorial(ndim)*math.factorial(d)) for d in range(1,maxDeg+1)]) degNew = range(1,maxDeg+1)[np.argmin(abs(M_uptoMax(maxDeg)-nSamples*ndim*d))] self.q = np.array(self.q) if not np.isscalar(self.q) else np.array([self.q]) - self.DegreeArray = np.arange(self.MinPceDegree,degNew+1) #or np.array([deg]) - + try: + self.DegreeArray = np.arange(self.MinPceDegree,maxDeg+1) #degNew+1) + except: + self.DegreeArray = np.array([degNew]) + # self.DegreeArray = np.array([degNew]) for deg in self.DegreeArray: - # self.allBasisIndices = self.AutoVivification() if deg not in np.fromiter(self.allBasisIndices.keys(), dtype=float): + # self.allBasisIndices = self.AutoVivification() # Generate the polynomial basis indices for qidx, q in enumerate(self.q): self.allBasisIndices[str(deg)][str(q)] = self.PolyBasisIndices(degree=deg, q=q) @@ -2473,9 +2510,7 @@ class Metamodel: validError_dict = {} # Loop over the keys and compute RMSE error. for key in OutputName: - validErr = 1 - r2_score(PCEOutputs[key],ModelOutputs,multioutput='variance_weighted') - - validError_dict[key] = validErr + validError_dict[key] = 1 - r2_score(ModelOutputs, PCEOutputs[key],multioutput='variance_weighted') return validError_dict @@ -2509,10 +2544,10 @@ class Metamodel: PCEStd = np.dot(PCEStd, PCA.components_) #PCEStd # Compute the error between mean and std of PCEModel and OrigModel - RMSE_Mean = mean_absolute_error(df_MCReference['mean'], PCEMean) - #np.sqrt(mean_squared_error(df_MCReference['mean'], PCEMean)) - RMSE_std = mean_absolute_error(df_MCReference['std'], PCEStd) - #np.sqrt(mean_squared_error(df_MCReference['std'], PCEStd)) + # RMSE_Mean = mean_absolute_error(df_MCReference['mean'], PCEMean) + # RMSE_std = mean_absolute_error(df_MCReference['std'], PCEStd) + RMSE_Mean = mean_squared_error(df_MCReference['mean'], PCEMean,squared=False) + RMSE_std = mean_squared_error(df_MCReference['std'], PCEStd,squared=False) PCEMeans[Outkey] = PCEMean PCEStds[Outkey] = PCEStd @@ -2569,7 +2604,7 @@ class Metamodel: self.posteriorPlot(initPosterior, MAP, parNames, 'SeqPosterior_init') # Check the convergence of the Mean&Std - if self.ModelObj.MCReference: + if self.ModelObj.MCReference and self.metaModel.lower()!='gpe': initRMSEMean, initRMSEStd = self.error_Mean_Std() print("Initial Mean and Std error: %s, %s"%(initRMSEMean, initRMSEStd)) @@ -2580,17 +2615,18 @@ class Metamodel: initYprev = deepcopy(initPCEModel.ModelOutputDict) # Read the initial ModifiedLOO - Scores_all, varExpDesignY =[], [] - for OutName in OutputName: - Scores_all.append(list(initPCEModel.ScoresDict[OutName].values())) - if self.DimRedMethod.lower() == 'pca': - components = self.pca[OutName].transform(initPCEModel.ExpDesign.Y[OutName]) - varExpDesignY.append(np.var(components,axis=0)) - else: - varExpDesignY.append(np.var(initPCEModel.ExpDesign.Y[OutName],axis=0)) - Scores = [item for sublist in Scores_all for item in sublist] - weights = [item for sublist in varExpDesignY for item in sublist] - initModifiedLOO = [np.average([1-score for score in Scores], weights=weights)] + if self.metaModel.lower()!='gpe': + Scores_all, varExpDesignY =[], [] + for OutName in OutputName: + Scores_all.append(list(initPCEModel.ScoresDict[OutName].values())) + if self.DimRedMethod.lower() == 'pca': + components = self.pca[OutName].transform(initPCEModel.ExpDesign.Y[OutName]) + varExpDesignY.append(np.var(components,axis=0)) + else: + varExpDesignY.append(np.var(initPCEModel.ExpDesign.Y[OutName],axis=0)) + Scores = [item for sublist in Scores_all for item in sublist] + weights = [item for sublist in varExpDesignY for item in sublist] + initModifiedLOO = [np.average([1-score for score in Scores], weights=weights)] if len(self.validModelRuns) != 0: initValidError = initPCEModel.validError_() @@ -2626,10 +2662,11 @@ class Metamodel: Yfull = [] # Store the initial ModifiedLOO - print("\nInitial ModifiedLOO:", initModifiedLOO) + if self.metaModel.lower()!='gpe': + print("\nInitial ModifiedLOO:", initModifiedLOO) - ModifiedLOO = initModifiedLOO - SeqModifiedLOO = np.array(ModifiedLOO) + ModifiedLOO = initModifiedLOO + SeqModifiedLOO = np.array(ModifiedLOO) if len(self.validModelRuns) != 0: ValidError = initValidError @@ -2640,7 +2677,7 @@ class Metamodel: SeqBME = np.array([initBME]) SeqKLD = np.array([initKLD]) SeqDistHellinger = np.array([initDistHellinger]) - if self.ModelObj.MCReference: + if self.ModelObj.MCReference and self.metaModel.lower()!='gpe': seqRMSEMean = np.array([initRMSEMean]) seqRMSEStd = np.array([initRMSEStd]) @@ -2663,6 +2700,12 @@ class Metamodel: Ynew, _ = Model.Run_Model_Parallel(Xnew, prevRun_No=TotalNSamples) TotalNSamples += Xnew.shape[0] + # -------- Plot the surrogate model vs Origninal Model ------- + if self.adaptVerbose: + from PostProcessing.adaptPlot import adaptPlot + y_hat, std_hat = PCEModel.eval_metamodel(samples=Xnew) + adaptPlot(PCEModel, Ynew, y_hat, std_hat) + # -------- Retrain the surrogate model ------- # Extend new experimental design Xfull = np.vstack((Xprev,Xnew)) @@ -2688,7 +2731,7 @@ class Metamodel: # Pass the new prior as the input if updatedPrior is not None: - print("updatedPrior:\n", updatedPrior.shape) + print("updatedPrior:", updatedPrior.shape) # arbitrary polynomial chaos for i in range(updatedPrior.shape[1]): self.Inputs.Marginals[i].DistType = None @@ -2705,23 +2748,24 @@ class Metamodel: print("\nUpdated ValidError:", ValidError) # Extract Modified LOO from Output - Scores_all, varExpDesignY =[], [] - for OutName in OutputName: - Scores_all.append(list(PCEModel.ScoresDict[OutName].values())) - if self.DimRedMethod.lower() == 'pca': - components = self.pca[OutName].transform(initPCEModel.ExpDesign.Y[OutName]) - varExpDesignY.append(np.var(components,axis=0)) - else: - varExpDesignY.append(np.var(PCEModel.ExpDesign.Y[OutName],axis=0)) - Scores = [item for sublist in Scores_all for item in sublist] - weights = [item for sublist in varExpDesignY for item in sublist] - ModifiedLOO = [np.average([1-score for score in Scores], - weights=weights)] - - print('\n') - print("Updated ModifiedLOO %s:\n"%UtilityFunction, ModifiedLOO) - print("Xfull:", Xfull.shape) - print('\n') + if self.metaModel.lower()!='gpe': + Scores_all, varExpDesignY =[], [] + for OutName in OutputName: + Scores_all.append(list(PCEModel.ScoresDict[OutName].values())) + if self.DimRedMethod.lower() == 'pca': + components = self.pca[OutName].transform(initPCEModel.ExpDesign.Y[OutName]) + varExpDesignY.append(np.var(components,axis=0)) + else: + varExpDesignY.append(np.var(PCEModel.ExpDesign.Y[OutName],axis=0)) + Scores = [item for sublist in Scores_all for item in sublist] + weights = [item for sublist in varExpDesignY for item in sublist] + ModifiedLOO = [np.average([1-score for score in Scores], + weights=weights)] + + print('\n') + print("Updated ModifiedLOO %s:\n"%UtilityFunction, ModifiedLOO) + print("Xfull:", Xfull.shape) + print('\n') # check the direction of the error (on average): @@ -2769,7 +2813,7 @@ class Metamodel: postcnt += 1 # Check the convergence of the Mean&Std - if self.ModelObj.MCReference: + if self.ModelObj.MCReference and self.metaModel.lower()!='gpe': print('\n') RMSE_Mean, RMSE_std = self.error_Mean_Std() print("Updated Mean and Std error: %s, %s"%(RMSE_Mean, RMSE_std)) @@ -2781,7 +2825,7 @@ class Metamodel: SeqBME = np.vstack((SeqBME, BME)) SeqKLD = np.vstack((SeqKLD, KLD)) SeqDistHellinger= np.vstack((SeqDistHellinger, DistHellinger)) - if self.ModelObj.MCReference: + if self.ModelObj.MCReference and self.metaModel.lower()!='gpe': seqRMSEMean = np.vstack((seqRMSEMean, RMSE_Mean)) seqRMSEStd = np.vstack((seqRMSEStd, RMSE_std)) @@ -2798,7 +2842,7 @@ class Metamodel: PCEModel.SeqKLD[strKey] = SeqKLD if len(self.validLikelihoods) != 0: PCEModel.SeqDistHellinger[strKey] = SeqDistHellinger - if self.ModelObj.MCReference: + if self.ModelObj.MCReference and self.metaModel.lower()!='gpe': PCEModel.seqRMSEMean[strKey] = seqRMSEMean PCEModel.seqRMSEStd[strKey] = seqRMSEStd diff --git a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py index 4e3687688..fc1a2ce36 100755 --- a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py +++ b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py @@ -39,7 +39,7 @@ from BayesInference.BayesInference import BayesInference, Discrepancy if __name__ == "__main__": # Number of parameters - ndim = 2 # 2, 10 + ndim = 10 # 2, 10 #===================================================== #============= COMPUTATIONAL MODEL ================ @@ -111,7 +111,7 @@ if __name__ == "__main__": # error (or the highest score=1-LOO)estimator is chosen as the final # metamodel. MetaModelOpts.MinPceDegree = 1 - MetaModelOpts.MaxPceDegree = 6 + MetaModelOpts.MaxPceDegree = 10 # q-quasi-norm 0<q<1 (default=1) MetaModelOpts.q = 1.0 if ndim<5 else 0.75 @@ -126,8 +126,8 @@ if __name__ == "__main__": MetaModelOpts.addExpDesign() # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.Method = 'normal' - NrofInitSamples = 500 #75 + MetaModelOpts.ExpDesign.Method = 'sequential' + NrofInitSamples = 75 #75 MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples # Sampling methods @@ -135,9 +135,13 @@ if __name__ == "__main__": # 7) korobov 8) grid(FT) 9) nested_grid(FT) 10)user MetaModelOpts.ExpDesign.SamplingMethod = 'random' + # Provide the experimental design object with a hdf5 file + # MetaModelOpts.ExpDesign.hdf5File = 'ExpDesign_AnalyticFunc.hdf5' + + # Sequential experimental design (needed only for sequential ExpDesign) MetaModelOpts.ExpDesign.NrofNewSample = 1 - MetaModelOpts.ExpDesign.MaxNSamples = 150 #150 + MetaModelOpts.ExpDesign.MaxNSamples = 100 #150 MetaModelOpts.ExpDesign.ModifiedLOOThreshold = 1e-16 # Defining the measurement error, if it's known a priori @@ -156,8 +160,9 @@ if __name__ == "__main__": # ------------------------------------------------ # ------- Sequential Design configuration -------- # ------------------------------------------------ + MetaModelOpts.adaptVerbose = True # 1) 'None' 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive' - MetaModelOpts.ExpDesign.TradeOffScheme = 'epsilon-decreasing' + MetaModelOpts.ExpDesign.TradeOffScheme = 'None' # MetaModelOpts.ExpDesign.nReprications = 2 # -------- Exploration ------ #1)'Voronoi' 2)'random' 3)'latin_hypercube' 4)'LOOCV' 5)'dual annealing' @@ -203,7 +208,7 @@ if __name__ == "__main__": #===================================================== PostPCE = PostProcessing(PCEModel) - PostPCE.NrofSamples = 3 + PostPCE.NrofSamples = 1 PostPCE.plotFlag = True t = np.arange(0, 10, 1.) / 9 PostPCE.ValidMetamodel(x_values=t, x_axis="Time [s]") @@ -231,7 +236,7 @@ if __name__ == "__main__": # Compute and print RMSE error PostPCE.relErrorPCEModel(nSamples=3000) - # sys.exit('STOP!!') + sys.exit('STOP!!') #===================================================== #======== Bayesian inference with Emulator ========== #===================================================== diff --git a/README.md b/README.md index 8f782586c..b6811e0b4 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,4 @@ Validation Benchmark * emcee * chaospy * pytables +* h5py diff --git a/configure.sh b/configure.sh index 2f8dc1f6f..b9e19d1dd 100755 --- a/configure.sh +++ b/configure.sh @@ -9,3 +9,4 @@ python -m pip install tqdm --user python -m pip install chaospy --user python -m pip install emcee --user python -m pip install tables --upgrade --user +python -m pip install h5py --user -- GitLab