diff --git a/BayesValidRox/BayesInference/BayesInference.py b/BayesValidRox/BayesInference/BayesInference.py index 596ab81ac58311f19ec577090fc586956e3088e8..0f03b946ea3dd821cd01fca5aea2af6bfc5e820f 100644 --- a/BayesValidRox/BayesInference/BayesInference.py +++ b/BayesValidRox/BayesInference/BayesInference.py @@ -113,7 +113,7 @@ class BayesInference: NrSamples = self.NrofSamples - self.Samples = PCEModel.ExpDesign.GetSample(NrSamples, 'random') + self.Samples = PCEModel.ExpDesign.generate_samples(NrSamples, 'random') return self.Samples @@ -552,7 +552,7 @@ class BayesInference: # Output the Posterior - InputNames = [PCEModel.Inputs.Marginals[i].Name for i in range(len(PCEModel.Inputs.Marginals))] + InputNames = [PCEModel.input_obj.Marginals[i].Name for i in range(len(PCEModel.input_obj.Marginals))] if Sigma2Prior is not None: for name in self.Discrepancy.Name: InputNames.append(name) @@ -573,11 +573,11 @@ class BayesInference: if self.MeasuredData is None: self.MeasuredData = Model.read_observation(case=self.Name) - if not isinstance(self.MeasuredData,pd.DataFrame): + if not isinstance(self.MeasuredData, pd.DataFrame): self.MeasuredData = pd.DataFrame(self.MeasuredData) # X_values - x_values = PCEModel.ExpDesign.Y['x_values'] + x_values = PCEModel.ExpDesign.x_values try: Sigma2Prior = self.Discrepancy.Sigma2Prior @@ -605,7 +605,7 @@ class BayesInference: name=self.Name) # TODO: Correct the predictions with Model discrepancy if hasattr(self, 'errorModel') and self.errorModel: - PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_errormodel(self.BiasInputs, + PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_model_error(self.BiasInputs, PosteriorPred) else: if self.SamplingMethod == 'rejection': @@ -617,7 +617,7 @@ class BayesInference: # TODO: Correct the predictions with Model discrepancy if hasattr(self, 'errorModel') and self.errorModel: - PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_errormodel(self.BiasInputs, + PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_model_error(self.BiasInputs, PosteriorPred) # Add discrepancy from likelihood Sample to the current posterior runs @@ -732,7 +732,7 @@ class BayesInference: Model = PCEModel.ModelObj NofPa = PCEModel.NofPa OutputNames = Model.Output.Names - parNames = [self.PCEModel.Inputs.Marginals[i].Name for i in range(len(PCEModel.Inputs.Marginals))] + par_names = PCEModel.ExpDesign.par_names # If the prior is set by the user, take it. if self.Samples is None: @@ -853,8 +853,8 @@ class BayesInference: y_MAP, y_std_MAP = PCEModel.eval_metamodel(samples=MAP_theta,name=self.Name) # Train a GPR meta-model using MAP - self.errorMetaModel = PCEModel.create_ModelError(self.BiasInputs,y_MAP, - Name=self.Name) + self.errorMetaModel = PCEModel.create_model_error(self.BiasInputs,y_MAP, + Name=self.Name) # ----------------------------------------------------- # ----- Loop over the perturbed observation data ------ @@ -873,7 +873,7 @@ class BayesInference: name=self.Name) # TODO: Correct the predictions with Model discrepancy if hasattr(self, 'errorModel') and self.errorModel: - self.__meanPCEPriorPred, self._stdPCEPriorPred = self.errorMetaModel.eval_errormodel(self.BiasInputs, + self.__meanPCEPriorPred, self._stdPCEPriorPred = self.errorMetaModel.eval_model_error(self.BiasInputs, self.__meanPCEPriorPred) # Surrogate model's error using RMSE of test data @@ -904,7 +904,7 @@ class BayesInference: # known sigma2 logLikelihoods[:,itrIdx] = self.normpdf(modelEvaluations, data_dict, TotalSigma2, std=surrError) - # y,std = PCEModel.eval_errormodel(self.Samples) + # y,std = PCEModel.eval_model_error(self.Samples) # logLikError = self.normpdferror(y, std) # ---------------- BME Calculations ---------------- @@ -972,7 +972,7 @@ class BayesInference: self.Posterior_df = MCMC_.run_sampler(self.MeasuredData, TotalSigma2) elif self.Name.lower() == 'valid': - self.Posterior_df = pd.DataFrame(self.Samples, columns=parNames) + self.Posterior_df = pd.DataFrame(self.Samples, columns=par_names) else: # Rejection sampling self.Posterior_df = self.Rejection_Sampling() @@ -1001,7 +1001,7 @@ class BayesInference: y_MAP, y_std_MAP = PCEModel.eval_metamodel(samples=MAP_theta,name=self.Name) # Train a GPR meta-model using MAP - self.errorMetaModel = PCEModel.create_ModelError(self.BiasInputs,y_MAP, + self.errorMetaModel = PCEModel.create_model_error(self.BiasInputs,y_MAP, Name=self.Name) # -------- Posterior perdictive ----------- @@ -1015,9 +1015,9 @@ class BayesInference: # -------- Posteior parameters -------- if optSigma != "B": - parNames.extend([self.Discrepancy.InputDisc.Marginals[i].Name for i in range(len(self.Discrepancy.InputDisc.Marginals))]) - - figPosterior = corner.corner(self.Posterior_df.to_numpy(), labels=parNames, + par_names.extend([self.Discrepancy.InputDisc.Marginals[i].Name for i in range(len(self.Discrepancy.InputDisc.Marginals))]) + + figPosterior = corner.corner(self.Posterior_df.to_numpy(), labels=par_names, quantiles=[0.15, 0.5, 0.85],show_titles=True, title_fmt=self.Corner_title_fmt, labelpad=0.2, @@ -1031,8 +1031,8 @@ class BayesInference: # Loop over axes and set x limits if optSigma == "B": - axes = np.array(figPosterior.axes).reshape((len(parNames), len(parNames))) - for yi in range(len(parNames)): + axes = np.array(figPosterior.axes).reshape((len(par_names), len(par_names))) + for yi in range(len(par_names)): ax = axes[yi, yi] ax.set_xlim(PCEModel.BoundTuples[yi]) for xi in range(yi): diff --git a/BayesValidRox/BayesInference/MCMC.py b/BayesValidRox/BayesInference/MCMC.py index ee276e084e6ed1a9c4bd99f1fa3742fc3e8078b8..1c918be1a7004721eae339d7cad4e0d3d48e29a2 100755 --- a/BayesValidRox/BayesInference/MCMC.py +++ b/BayesValidRox/BayesInference/MCMC.py @@ -272,13 +272,11 @@ class MCMC(): "successfully completed. <<<<<<\n") # Extract parameter names and their prior ranges - parNames = [] - for i in range(len(PCEModel.Inputs.Marginals)): - parNames.append(PCEModel.Inputs.Marginals[i].Name) + par_names = PCEModel.ExpDesign.par_names if Discrepancy.optSigma != 'B': for i in range(len(Discrepancy.InputDisc.Marginals)): - parNames.append(Discrepancy.InputDisc.Marginals[i].Name) + par_names.append(Discrepancy.InputDisc.Marginals[i].Name) params_range = PCEModel.ExpDesign.BoundTuples @@ -303,7 +301,7 @@ class MCMC(): orientation='horizontal', color='gray') main_ax.set_ylim(params_range[parIdx]) - main_ax.set_title('traceplot for ' + parNames[parIdx]) + main_ax.set_title('traceplot for ' + par_names[parIdx]) main_ax.set_xlabel('step number') # save the current figure @@ -352,7 +350,7 @@ class MCMC(): # postExpLikelihoods_emcee # print("Information Entropy: %.5f" %infEntropy_emcee) - Posterior_df = pd.DataFrame(finalsamples, columns=parNames) + Posterior_df = pd.DataFrame(finalsamples, columns=par_names) return Posterior_df @@ -372,7 +370,7 @@ class MCMC(): # Check if the sample is within the parameters' range if self.check_ranges(theta[i], params_range): # Check if all dists are uniform, if yes priors are equal. - if all(PCEModel.Inputs.Marginals[i].DistType == 'uniform' for i in \ + if all(PCEModel.input_obj.Marginals[i].DistType == 'uniform' for i in \ range(PCEModel.NofPa)): logprior[i] = 0.0 else: @@ -510,7 +508,7 @@ class MCMC(): self.counter += 1 if hasattr(self, 'errorMetaModel') and BayesObj.errorModel: - meanPred, stdPred = self.errorMetaModel.eval_errormodel( + meanPred, stdPred = self.errorMetaModel.eval_model_error( BayesObj.BiasInputs, meanPred) return meanPred, stdPred @@ -542,8 +540,8 @@ class MCMC(): name=BayesObj.Name) # Train a GPR meta-model using MAP - errorMetaModel = PCEModel.create_ModelError(BayesObj.BiasInputs, - y_MAP, Name='Calib') + errorMetaModel = PCEModel.create_model_error(BayesObj.BiasInputs, + y_MAP, Name='Calib') return errorMetaModel def Marginal_llk_emcee(self, sampler, nburn=None, logp=None, maxiter=1000): diff --git a/BayesValidRox/PostProcessing/PostProcessing.py b/BayesValidRox/PostProcessing/PostProcessing.py index 1a998760db9aa35a88028b0103945bc82cd4a009..b1ab0e629861e34b4018e1473eddfc619c92a795 100644 --- a/BayesValidRox/PostProcessing/PostProcessing.py +++ b/BayesValidRox/PostProcessing/PostProcessing.py @@ -64,21 +64,21 @@ class PostProcessing: PCEModel = self.PCEModel Model = PCEModel.ModelObj - for Outkey, ValuesDict in PCEModel.CoeffsDict.items(): + for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): PCEMean = np.zeros((len(ValuesDict))) PCEVar = np.zeros((len(ValuesDict))) for Inkey, InIdxValues in ValuesDict.items(): idx = int(Inkey.split('_')[1]) - 1 - coeffs = PCEModel.CoeffsDict[Outkey][Inkey] + coeffs = PCEModel.coeffs_dict[Outkey][Inkey] # Mean = c_0 PCEMean[idx] = coeffs[0] if coeffs[0]!=0 else PCEModel.clf_poly[Outkey][Inkey].intercept_ # Var = sum(coeffs[1:]**2) PCEVar[idx] = np.sum(np.square(coeffs[1:])) - if PCEModel.DimRedMethod.lower() == 'pca': + if PCEModel.dim_red_method.lower() == 'pca': PCA = PCEModel.pca[Outkey] self.PCEMeans[Outkey] = PCA.mean_ + np.dot(PCEMean, PCA.components_) self.PCEStd[Outkey] = np.sqrt(np.dot(PCEVar, PCA.components_**2)) @@ -92,101 +92,96 @@ class PostProcessing: # ------------------------------------------------------------------------- def plotMoments(self, xlabel='Time [s]', plotType=None, SaveFig=True): - - barPlot = True if plotType == 'bar' else False - metaModel = self.PCEModel.metaModel - + + barPlot = True if plotType == 'bar' else False + metaModel = self.PCEModel.meta_model_type + # Set the x values - x_values_orig = self.PCEModel.ExpDesign.Y['x_values'] - + x_values_orig = self.PCEModel.ExpDesign.x_values + # Compute the moments with the PCEModel object self.PCEMoments() - + # Get the variables Keys = list(self.PCEMeans.keys()) - index = self.PCEModel.index - cases = [''] if index is None else ['Calib', 'Valid'] - - for case in cases: - # Open a pdf for the plots - if SaveFig: - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - - # create a PdfPages object - name = case+'_' if 'Valid' in cases else '' - pdf = PdfPages('./'+newpath+name+'Mean_Std_PCE.pdf') - - - # Plot the best fit line, set the linewidth (lw), color and - # transparency (alpha) of the line - for idx, key in enumerate(Keys): - fig, ax = plt.subplots(nrows=1, ncols=2) - - # Extract a list of x values - x_values = x_values_orig[key] if type(x_values_orig) is dict else x_values_orig - - if case == 'Calib': - mean_data = self.PCEMeans[key][:index[idx]] - std_data = self.PCEStd[key][:index[idx]] - x = x_values if index is None else x_values[:index[idx]] - elif case == 'Valid': - mean_data = self.PCEMeans[key][index[idx]:] - std_data = self.PCEStd[key][index[idx]:] - x = x_values[index[idx]:] - else: - mean_data = self.PCEMeans[key] - std_data = self.PCEStd[key] - x = x_values - - - # Plot: bar plot or line plot + + # Open a pdf for the plots + if SaveFig: + newpath = (f'Outputs_PostProcessing_{self.Name}/') + if not os.path.exists(newpath): + os.makedirs(newpath) + + # create a PdfPages object + pdf = PdfPages(f'./{newpath}Mean_Std_PCE.pdf') + + # Plot the best fit line, set the linewidth (lw), color and + # transparency (alpha) of the line + for idx, key in enumerate(Keys): + fig, ax = plt.subplots(nrows=1, ncols=2) + + # Extract mean and std + mean_data = self.PCEMeans[key] + std_data = self.PCEStd[key] + + # Extract a list of x values + if type(x_values_orig) is dict: + x = x_values_orig[key] + else: + x = x_values_orig + + # Plot: bar plot or line plot + if barPlot: + ax[0].bar(list(map(str, x)), mean_data, color='b', + width=0.25) + ax[1].bar(list(map(str, x)), std_data, color='b', + width=0.25) + ax[0].legend(labels=[metaModel]) + ax[1].legend(labels=[metaModel]) + else: + ax[0].plot(x, mean_data, lw=3, color='k', marker='x', + label=metaModel) + ax[1].plot(x, std_data, lw=3, color='k', marker='x', + label=metaModel) + + if hasattr(self, 'Reference'): if barPlot: - ax[0].bar(list(map(str, x)), mean_data, color = 'b', width = 0.25) - ax[1].bar(list(map(str, x)), std_data, color = 'b', width = 0.25) - ax[0].legend(labels= [metaModel]) - ax[1].legend(labels= [metaModel]) + ax[0].bar(list(map(str, x)), self.Reference['mean'], + color='r', width=0.25) + ax[1].bar(list(map(str, x)), self.Reference['std'], + color='r', width=0.25) + ax[0].legend(labels=[metaModel]) + ax[1].legend(labels=[metaModel]) else: - ax[0].plot(x, mean_data, lw=3, color='k', marker='x', label=metaModel) - ax[1].plot(x, std_data, lw=3, color='k', marker='x', label=metaModel) - - try: - if barPlot: - ax[0].bar(list(map(str, x)), self.Reference['mean'], color = 'r', width = 0.25) - ax[1].bar(list(map(str, x)), self.Reference['std'], color = 'r', width = 0.25) - ax[0].legend(labels= [metaModel]) - ax[1].legend(labels= [metaModel]) - else: - ax[0].plot(x, self.Reference['mean'], lw=3, marker='x', color='r', label='Ref.') - ax[1].plot(x, self.Reference['std'], lw=3, marker='x', color='r', label='Ref.') - except: - pass - - # Label the axes and provide a title - ax[0].set_xlabel(xlabel) - ax[1].set_xlabel(xlabel) - ax[0].set_ylabel(Keys[idx]) - ax[1].set_ylabel(Keys[idx]) - - # Provide a title - ax[0].set_title('Mean of '+ key) - ax[1].set_title('Std of '+ key) - - if not barPlot: - ax[0].legend(loc='best') - ax[1].legend(loc='best') - - plt.tight_layout() - - if SaveFig: - # save the current figure - pdf.savefig(fig, bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - pdf.close() - + ax[0].plot(x, self.Reference['mean'], lw=3, marker='x', + color='r', label='Ref.') + ax[1].plot(x, self.Reference['std'], lw=3, marker='x', + color='r', label='Ref.') + + # Label the axes and provide a title + ax[0].set_xlabel(xlabel) + ax[1].set_xlabel(xlabel) + ax[0].set_ylabel(Keys[idx]) + ax[1].set_ylabel(Keys[idx]) + + # Provide a title + ax[0].set_title('Mean of ' + key) + ax[1].set_title('Std of ' + key) + + if not barPlot: + ax[0].legend(loc='best') + ax[1].legend(loc='best') + + plt.tight_layout() + + if SaveFig: + # save the current figure + pdf.savefig(fig, bbox_inches='tight') + + # Destroy the current plot + plt.clf() + + pdf.close() + # ------------------------------------------------------------------------- def get_Sample(self): @@ -194,7 +189,7 @@ class PostProcessing: NrSamples = self.NrofSamples - self.Samples = PCEModel.ExpDesign.GetSample(NrSamples, SamplingMethod='random') + self.Samples = PCEModel.ExpDesign.generate_samples(NrSamples, 'random') return self.Samples @@ -226,7 +221,7 @@ class PostProcessing: univ_p_val = PCEModel.univ_basis_vals(SampleMesh) - for Outkey, ValuesDict in PCEModel.CoeffsDict.items(): + for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): @@ -237,7 +232,7 @@ class PostProcessing: for Inkey, InIdxValues in ValuesDict.items(): idx = int(Inkey.split('_')[1]) - 1 - PolynomialDegrees = PCEModel.BasisDict[Outkey][Inkey] + PolynomialDegrees = PCEModel.basis_dict[Outkey][Inkey] clf_poly = PCEModel.clf_poly[Outkey][Inkey] PSI_Val = PCEModel.PCE_create_Psi(PolynomialDegrees, univ_p_val) @@ -308,7 +303,6 @@ class PostProcessing: Evaluate Forward Model """ - index = self.PCEModel.index Model = self.PCEModel.ModelObj if Samples is None: @@ -320,12 +314,7 @@ class PostProcessing: ModelOutputs, CollocationPoints = Model.run_model_parallel(Samples, keyString=keyString) - if index is not None: - self.ModelOutputs['x_values'] = {key: ModelOutputs['x_values'][key][:index[idx]] for idx, key in enumerate(Model.Output.names)}#([list(ModelOutputs.keys())[0]])} - Outputs = {key: ModelOutputs[key][:,:index[idx]] for idx, key in enumerate(Model.Output.names)} - self.ModelOutputs.update(Outputs) - else: - self.ModelOutputs = ModelOutputs + self.ModelOutputs = ModelOutputs return self.ModelOutputs @@ -343,10 +332,10 @@ class PostProcessing: self.NrofSamples = samples.shape[0] - x_values = self.PCEModel.ExpDesign.Y['x_values'] + x_values = self.PCEModel.ExpDesign.x_values self.ModelOutputs = self.eval_Model(samples, keyString='valid') - self.PCEOutputs, self.PCEOutputs_std = metaModel.eval_metamodel(samples=samples,name=self.Name) + self.PCEOutputs, self.PCEOutputs_std = metaModel.eval_metamodel(samples=samples) try: key = list(self.ModelOutputs.keys())[1] @@ -441,7 +430,7 @@ class PostProcessing: # Calculate the adjusted R_squared and RMSE # the total number of explanatory variables in the model (not including the constant term) - length_list = [len(value) for Key, value in PCEModel.BasisDict[key].items()] + length_list = [len(value) for Key, value in PCEModel.basis_dict[key].items()] NofPredictors = min(length_list) TotalSampleSize = X_Val.shape[0] #sample size @@ -682,7 +671,7 @@ class PostProcessing: """ PCEModel = self.PCEModel - NrofInitSamples = PCEModel.ExpDesign.initNrSamples + NrofInitSamples = PCEModel.ExpDesign.n_init_samples totalNSamples = PCEModel.ExpDesign.X.shape[0] if SaveFig: @@ -750,8 +739,8 @@ class PostProcessing: patch.set(facecolor=fill_color[idx]) - step1 = PCEModel.ExpDesign.NrofNewSample if PCEModel.ExpDesign.NrofNewSample!=1 else 5 - step2 = 1 if PCEModel.ExpDesign.NrofNewSample!=1 else 5 + step1 = PCEModel.ExpDesign.n_new_samples if PCEModel.ExpDesign.n_new_samples!=1 else 5 + step2 = 1 if PCEModel.ExpDesign.n_new_samples!=1 else 5 edge_color = ['red', 'blue', 'green'] fill_color = ['tan', 'cyan', 'lightgreen'] @@ -817,7 +806,7 @@ class PostProcessing: for idx, name in enumerate(nameUtil): SeqValues = SeqDict[name] - step = PCEModel.ExpDesign.NrofNewSample if PCEModel.ExpDesign.NrofNewSample!=1 else 1 + step = PCEModel.ExpDesign.n_new_samples if PCEModel.ExpDesign.n_new_samples!=1 else 1 x_idx = np.arange(NrofInitSamples, totalNSamples+1, step) x_idx = np.hstack((x_idx, totalNSamples)) if totalNSamples not in x_idx else x_idx @@ -907,14 +896,14 @@ class PostProcessing: """ # Extract the necessary variables PCEModel = self.PCEModel - BasisDict = PCEModel.BasisDict - CoeffsDict = PCEModel.CoeffsDict - NofPa = PCEModel.NofPa - max_order = PCEModel.MaxPceDegree + basis_dict = PCEModel.basis_dict + coeffs_dict = PCEModel.coeffs_dict + NofPa = PCEModel.n_params + max_order = np.max(PCEModel.pce_deg) for Output in PCEModel.ModelObj.Output.names: - n_meas_points = len(CoeffsDict[Output]) + n_meas_points = len(coeffs_dict[Output]) # Initialize the (cell) array containing the (total) Sobol indices. sobol_array = dict.fromkeys(range(1, max_order+1), []) @@ -934,13 +923,13 @@ class PostProcessing: for pIdx in range(n_meas_points): # Extract the basis indices (alpha) and coefficients - Basis = BasisDict[Output][f'y_{pIdx+1}'] + Basis = basis_dict[Output][f'y_{pIdx+1}'] try: clf_poly = PCEModel.clf_poly[Output][f'y_{pIdx+1}'] PCECoeffs = clf_poly.coef_ except: - PCECoeffs = CoeffsDict[Output][f'y_{pIdx+1}'] + PCECoeffs = coeffs_dict[Output][f'y_{pIdx+1}'] # Compute total variance TotalVariance[pIdx] = np.sum(np.square(PCECoeffs[1:])) @@ -986,18 +975,18 @@ class PostProcessing: total_sobol_array[ParIdx, pIdx] = sobol # ----- if PCA selected: Compute covariance ----- - if PCEModel.DimRedMethod.lower() == 'pca': + if PCEModel.dim_red_method.lower() == 'pca': cov_Z_p_q = np.zeros((NofPa)) # Extract the basis indices (alpha) and coefficients for # next component if pIdx < n_meas_points-1: - nextBasis = BasisDict[Output][f'y_{pIdx+2}'] + nextBasis = basis_dict[Output][f'y_{pIdx+2}'] try: clf_poly = PCEModel.clf_poly[Output][f'y_{pIdx+2}'] nextPCECoeffs = clf_poly.coef_ except: - nextPCECoeffs = CoeffsDict[Output][f'y_{pIdx+2}'] + nextPCECoeffs = coeffs_dict[Output][f'y_{pIdx+2}'] # Choose the common non-zero basis mask = (Basis[:, None] == nextBasis).all(-1).any(-1) @@ -1013,7 +1002,7 @@ class PostProcessing: cov_Z_p_q[ParIdx] = 0.0 # Compute the sobol indices according to Ref. 2 - if PCEModel.DimRedMethod.lower() == 'pca': + if PCEModel.dim_red_method.lower() == 'pca': n_c_points = PCEModel.ExpDesign.Y[Output].shape[1] PCA = PCEModel.pca[Output] compPCA = PCA.components_ @@ -1078,11 +1067,10 @@ class PostProcessing: self.total_sobol[Output] = total_sobol_array # ---------------- Plot ----------------------- - parNames = [PCEModel.Inputs.Marginals[i].Name for i in range(NofPa)] - index = PCEModel.index - x_values_orig = PCEModel.ExpDesign.Y['x_values'] + par_names = PCEModel.ExpDesign.par_names + x_values_orig = PCEModel.ExpDesign.x_values - cases = [''] if index is None else ['Calib', 'Valid'] + cases = [''] for case in cases: newpath = (f'Outputs_PostProcessing_{self.Name}/') @@ -1098,41 +1086,29 @@ class PostProcessing: for outIdx, Output in enumerate(PCEModel.ModelObj.Output.names): + # Extract total Sobol indices + total_sobol = self.total_sobol[Output] + # Extract a list of x values if type(x_values_orig) is dict: - x_values = x_values_orig[Output] - else: - x_values = x_values_orig - - if case == 'Calib': - total_sobol = self.total_sobol[Output][:, :index[outIdx]] - if x_values is None: - x = x_values - else: - x = x_values[:index[outIdx]] - - elif case == 'Valid': - total_sobol = self.total_sobol[Output][:, index[outIdx]:] - x = x_values[index[outIdx]:] - + x = x_values_orig[Output] else: - total_sobol = self.total_sobol[Output] - x = x_values + x = x_values_orig if plotType == 'bar': ax = fig.add_axes([0, 0, 1, 1]) dict1 = {xlabel: x} dict2 = {param: sobolIndices for param, sobolIndices - in zip(parNames, total_sobol)} + in zip(par_names, total_sobol)} df = pd.DataFrame({**dict1, **dict2}) - df.plot(x=xlabel, y=parNames, kind="bar", ax=ax, rot=0, + df.plot(x=xlabel, y=par_names, kind="bar", ax=ax, rot=0, colormap='Dark2') ax.set_ylabel('Total Sobol indices, $S^T$') else: for i, sobolIndices in enumerate(total_sobol): - plt.plot(x, sobolIndices, label=parNames[i], + plt.plot(x, sobolIndices, label=par_names[i], marker='x', lw=2.5) plt.ylabel('Total Sobol indices, $S^T$') @@ -1146,7 +1122,7 @@ class PostProcessing: np.savetxt(f'./{newpath}{name}totalsobol_' + Output.replace('/', '_') + '.csv', total_sobol.T, delimiter=',', - header=','.join(parNames), comments='') + header=','.join(par_names), comments='') if SaveFig: # save the current figure diff --git a/BayesValidRox/PostProcessing/adaptPlot.py b/BayesValidRox/PostProcessing/adaptPlot.py index f9c958efe72643ffa8a49744d9c0b104f7ef0391..c67bdeea9e9f8f2692e5732beb9bc38ac3eb05f7 100755 --- a/BayesValidRox/PostProcessing/adaptPlot.py +++ b/BayesValidRox/PostProcessing/adaptPlot.py @@ -27,10 +27,9 @@ plt.rc('savefig', dpi=1000) def adaptPlot(PCEModel, Y_Val, Y_PC_Val, Y_PC_Val_std, x_values=[], plotED=False, SaveFig=True): - index = PCEModel.index - NrofSamples = PCEModel.ExpDesign.NrofNewSample - initNSamples = PCEModel.ExpDesign.initNrSamples - itrNr = 1 + (PCEModel.ExpDesign.X.shape[0] - initNSamples)//PCEModel.ExpDesign.NrofNewSample + NrofSamples = PCEModel.ExpDesign.n_new_samples + initNSamples = PCEModel.ExpDesign.n_init_samples + itrNr = 1 + (PCEModel.ExpDesign.X.shape[0] - initNSamples)//NrofSamples oldEDY = PCEModel.ExpDesign.Y diff --git a/BayesValidRox/PyLink/PyLinkForwardModel.py b/BayesValidRox/PyLink/PyLinkForwardModel.py index 1ca7fdcc9ae82f09307a54a148ed88a72a6b4cae..dcc0ce26e6a8e689f9c023b7858e3e56fa93af3f 100644 --- a/BayesValidRox/PyLink/PyLinkForwardModel.py +++ b/BayesValidRox/PyLink/PyLinkForwardModel.py @@ -154,6 +154,7 @@ class PyLinkForwardModel(object): raise Exception("Please provide the MC reference data as a " "dictionary via mc_reference attribute or pass the" " csv-file path to mc_ref_file attribute") + return self.mc_reference # ------------------------------------------------------------------------- def read_output(self): @@ -425,7 +426,7 @@ class PyLinkForwardModel(object): oldEDY = np.array(file[f'EDY/{var}/adaptive_{keyString}']) del file[f'EDY/{var}/adaptive_{keyString}'] data = np.vstack((oldEDY, Outputs)) - except: + except KeyError: data = Outputs grpY.create_dataset('adaptive_'+keyString, data=data) @@ -441,7 +442,7 @@ class PyLinkForwardModel(object): oldEDY = np.array(file[name]) del file[f'EDY/{var}/New_adaptive_{keyString}'] data = np.vstack((oldEDY, all_outputs[var])) - except: + except KeyError: data = all_outputs[var] grpY.create_dataset(f'New_adaptive_{keyString}', data=data) @@ -471,7 +472,7 @@ class PyLinkForwardModel(object): oldCollocationPoints = np.array(file[name]) del file[f'EDX/adaptive_{keyString}'] data = np.vstack((oldCollocationPoints, new_c_points)) - except: + except KeyError: data = new_c_points grpX.create_dataset('adaptive_'+keyString, data=data) @@ -481,7 +482,7 @@ class PyLinkForwardModel(object): oldCollocationPoints = np.array(file[name]) del file[f'EDX/New_adaptive_{keyString}'] data = np.vstack((oldCollocationPoints, new_c_points)) - except: + except KeyError: data = new_c_points grpX.create_dataset('New_adaptive_'+keyString, data=data) diff --git a/BayesValidRox/bayesvalidrox.mplstyle b/BayesValidRox/bayesvalidrox.mplstyle new file mode 100644 index 0000000000000000000000000000000000000000..1f31c01f24597de0e0be741be4d3a706c4213a6c --- /dev/null +++ b/BayesValidRox/bayesvalidrox.mplstyle @@ -0,0 +1,16 @@ +figure.titlesize : 30 +axes.titlesize : 30 +axes.labelsize : 30 +axes.linewidth : 3 +axes.grid : True +lines.linewidth : 3 +lines.markersize : 10 +xtick.labelsize : 30 +ytick.labelsize : 30 +legend.fontsize : 30 +font.family : serif +font.serif : Arial +font.size : 30 +text.usetex : True +grid.linestyle : - +figure.figsize : 24, 16 diff --git a/BayesValidRox/surrogate_models/ExpDesigns.py b/BayesValidRox/surrogate_models/ExpDesigns.py index afa9ec3d640201a457304275ba28e39ad0ff2d1a..8069c73d184b0b296496e032717cb5825232505e 100644 --- a/BayesValidRox/surrogate_models/ExpDesigns.py +++ b/BayesValidRox/surrogate_models/ExpDesigns.py @@ -1,9 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Sat Aug 24 14:41:02 2019 +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Sat Aug 24 2019 -@author: farid """ import numpy as np @@ -17,66 +24,104 @@ from .aPoly_Construction import aPoly_Construction class ExpDesigns: - def __init__(self, Input, metaModel='pce'): + def __init__(self, Input, meta_Model='pce', hdf5_file=None, + n_new_samples=1, n_max_samples=None, mod_LOO_threshold=1e-16, + tradeoff_scheme=None, n_canddidate=1, explore_method='random', + exploit_method='Space-filling', util_func='Space-filling', + n_cand_groups=4, n_replication=1, post_snapshot=False, + step_snapshot=1, max_a_post=[]): + self.InputObj = Input - self.SamplingMethod = 'random' - self.metaModel = metaModel - self.TradeOffScheme = 'None' - self.MaxNSamples = None - self.NrofNewSample = 1 - self.NCandidate = 1 - self.PostSnapshot = False - self.stepSnapshot = 2 - self.ExploitMethod = 'Space-filling' - self.UtilityFunction = 'Space-filling' - self.ExploreMethod = 'random' - self.NrofCandGroups = 4 - self.nReprications = 1 - self.parNames = [] - self.MAP = [] - self.X = None - self.Y = None - self.hdf5File = None - - def GetSample(self, NrSamples, SamplingMethod='random', transform=False, - MaxPceDegree=None): + self.meta_Model = meta_Model + self.hdf5_file = hdf5_file + self.n_new_samples = n_new_samples + self.n_max_samples = n_max_samples + self.mod_LOO_threshold = mod_LOO_threshold + self.explore_method = explore_method + self.exploit_method = exploit_method + self.util_func = util_func + self.tradeoff_scheme = tradeoff_scheme + self.n_canddidate = n_canddidate + self.n_cand_groups = n_cand_groups + self.n_replication = n_replication + self.post_snapshot = post_snapshot + self.step_snapshot = step_snapshot + self.max_a_post = max_a_post + + def generate_samples(self, n_samples, sampling_method='random', + transform=False): + """ + Generates samples with given sampling method + + Parameters + ---------- + n_samples : int + Number of requested samples. + sampling_method : TYPE, optional + DESCRIPTION. The default is 'random'. + transform : bool, optional + Transformation via an isoprobabilistic transformation method. The + default is False. + + Returns + ------- + samples: array of shape (n_samples, n_params) + Generated samples from defined model input object. + + """ + try: + samples = chaospy.generate_samples(n_samples, domain=self.JDist, + rule=sampling_method) + except: + samples = self.JDist.resample(n_samples) + + # Transform samples to the original space + if transform: + tr_samples = self.transform(samples.T) + return samples.T, tr_samples + else: + return samples.T + + def generate_ED(self, n_samples, sample_method='random', transform=False, + max_pce_deg=None): """ - Generates samples with the given method. + Generates experimental designs (training set) with the given method. Parameters ---------- - NrSamples : TYPE - DESCRIPTION. - MaxPceDegree : TYPE, optional - DESCRIPTION. The default is 2. - SamplingMethod : TYPE, optional - DESCRIPTION. The default is 'MC'. + n_samples : int + Number of requested training points. + sample_method : string, optional + Sampling method. The default is 'random'. + transform : bool, optional + Isoprobabilistic transformation. The default is False. + max_pce_deg : TYPE, optional + Maximum PCE polynomial degree. The default is None. Returns ------- - X : TYPE - DESCRIPTION. - u : TYPE - DESCRIPTION. + array of shape (n_samples, n_params) + Selected training samples. """ Inputs = self.InputObj self.ndim = len(Inputs.Marginals) - self.SamplingMethod = SamplingMethod - NrSamples = int(NrSamples) - if len(Inputs.Marginals[0].InputValues) == 0: + if not hasattr(self, 'n_init_samples'): + self.n_init_samples = self.ndim + 1 + n_samples = int(n_samples) + if len(Inputs.Marginals[0].input_data) == 0: self.arbitrary = False else: self.arbitrary = True - # Get the bounds if InputValues are directly defined by user: - if Inputs.Marginals[0].Parameters is None: + # Get the bounds if input_data are directly defined by user: + if Inputs.Marginals[0].parameters is None: for i in range(self.ndim): - low_bound = np.min(Inputs.Marginals[i].InputValues) - up_bound = np.max(Inputs.Marginals[i].InputValues) + low_bound = np.min(Inputs.Marginals[i].input_data) + up_bound = np.max(Inputs.Marginals[i].input_data) Inputs.Marginals[i].Parameters = [low_bound, up_bound] - if self.SamplingMethod == 'user': + if self.sampling_method == 'user': samples = self.X self.NrSamples = len(samples) @@ -84,42 +129,42 @@ class ExpDesigns: if not self.arbitrary: # Case I = polytype not arbitrary # Execute initialization to get the boundtuples - raw_data, bounds = self.Parameter_Initialization(MaxPceDegree) + raw_data, bounds = self.Parameter_Initialization(max_pce_deg) self.raw_data = raw_data self.BoundTuples = bounds # Create ExpDesign in the actual space - if self.SamplingMethod != 'user': - samples = chaospy.generate_samples(NrSamples, domain=self.JDist, - rule=SamplingMethod).T + if self.sampling_method != 'user': + samples = chaospy.generate_samples(n_samples, domain=self.JDist, + rule=sample_method).T elif self.arbitrary: # Case II: polytype arbitrary or Input values are directly given by # the user. # Generate the samples based on requested method - if self.SamplingMethod == 'user': - raw_data, bounds = self.Parameter_Initialization(MaxPceDegree) + if self.sampling_method == 'user': + raw_data, bounds = self.Parameter_Initialization(max_pce_deg) self.raw_data = raw_data self.BoundTuples = bounds - elif self.SamplingMethod == 'random': - samples = self.MCSampling(NrSamples, MaxPceDegree) + elif self.sampling_method == 'random': + samples = self.MCSampling(n_samples, max_pce_deg) - elif self.SamplingMethod == 'PCM' or self.SamplingMethod == 'LSCM': - samples = self.PCMSampling(MaxPceDegree) + elif self.sampling_method == 'PCM' or self.sampling_method == 'LSCM': + samples = self.PCMSampling(max_pce_deg) else: # Execute initialization to get the boundtuples - raw_data, bounds = self.Parameter_Initialization(MaxPceDegree) + raw_data, bounds = self.Parameter_Initialization(max_pce_deg) self.raw_data = raw_data self.BoundTuples = bounds # Create ExpDesign in the actual space using chaospy try: - samples = chaospy.generate_samples(NrSamples, + samples = chaospy.generate_samples(n_samples, domain=self.JDist, - rule=SamplingMethod).T + rule=sample_method).T except: - samples = self.MCSampling(NrSamples, MaxPceDegree) + samples = self.MCSampling(n_samples, max_pce_deg) # Transform samples to the original space if transform: @@ -152,11 +197,11 @@ class ExpDesigns: n_samples, n_params = X.shape Inputs = self.InputObj origJDist = self.JDist - polyTypes = self.Polytypes + poly_types = self.poly_types disttypes = [] for par_i in range(n_params): - disttypes.append(Inputs.Marginals[par_i].DistType) + disttypes.append(Inputs.Marginals[par_i].dist_type) # Pass non-transformed X, if arbitrary PCE is selected. if None in disttypes or self.arbitrary: @@ -173,7 +218,7 @@ class ExpDesigns: dist = origJDist[par_i] else: dist = None - polytype = polyTypes[par_i] + polytype = poly_types[par_i] cdf = np.vectorize(lambda x: dist.cdf(x)) # Extract the parameters of the transformation space based on @@ -196,12 +241,12 @@ class ExpDesigns: inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) elif polytype == 'arbitrary' or disttype is None: - # mu_X = Inputs.Marginals[par_i].Moments[0] - # stDev_X = Inputs.Marginals[par_i].Moments[1] + # mu_X = Inputs.Marginals[par_i].moments[0] + # stDev_X = Inputs.Marginals[par_i].moments[1] # cdf = np.vectorize(lambda x: (x - mu_X) / stDev_X) # # TODO: Unknown dist with gaussian_kde - # mu_Y = Y_marginals[par_i].Moments[0] - # stDev_Y = Y_marginals[par_i].Moments[1] + # mu_Y = Y_marginals[par_i].moments[0] + # stDev_Y = Y_marginals[par_i].moments[1] # inv_cdf = np.vectorize(lambda x: stDev_Y * x + mu_Y) pass @@ -249,19 +294,19 @@ class ExpDesigns: all_data = [] all_DistTypes = [] origJoints = [] - Polytypes = [] + poly_types = [] for parIdx in range(self.ndim): - if Inputs.Marginals[parIdx].DistType is None: - data = Inputs.Marginals[parIdx].InputValues + if Inputs.Marginals[parIdx].dist_type is None: + data = Inputs.Marginals[parIdx].input_data all_data.append(data) DistType, params = self.FitDist(data) - Inputs.Marginals[parIdx].DistType = DistType - Inputs.Marginals[parIdx].Parameters = params + Inputs.Marginals[parIdx].dist_type = DistType + Inputs.Marginals[parIdx].parameters = params else: - DistType = Inputs.Marginals[parIdx].DistType - params = Inputs.Marginals[parIdx].Parameters + DistType = Inputs.Marginals[parIdx].dist_type + params = Inputs.Marginals[parIdx].parameters if rosenblatt: polytype = 'hermite' @@ -313,9 +358,9 @@ class ExpDesigns: if self.arbitrary: polytype = 'arbitrary' - # Store dists and polytypes + # Store dists and poly_types origJoints.append(Dist) - Polytypes.append(polytype) + poly_types.append(polytype) all_DistTypes.append(DistType) # Prepare final output to return @@ -328,7 +373,7 @@ class ExpDesigns: origSpaceDist = chaospy.J(*origJoints) self.priorSpace = st.gaussian_kde(origSpaceDist.sample(10000)) - return origSpaceDist, Polytypes + return origSpaceDist, poly_types def Parameter_Initialization(self, MaxPceDegree=None, OptDesignFlag=False): """ @@ -342,22 +387,25 @@ class ExpDesigns: # Create a multivariate probability distribution if MaxPceDegree is not None: - JDist, Polytypes = self.DistConstructor(rosenblatt=Rosenblatt) - self.JDist, self.Polytypes = JDist, Polytypes + JDist, poly_types = self.DistConstructor(rosenblatt=Rosenblatt) + self.JDist, self.poly_types = JDist, poly_types if self.arbitrary: - self.MCSize = len(Inputs.Marginals[0].InputValues) + self.MCSize = len(Inputs.Marginals[0].input_data) self.raw_data = np.zeros((ndim, self.MCSize)) + self.par_names = [] for parIdx in range(ndim): + # Save parameter names + self.par_names.append(Inputs.Marginals[parIdx].name) try: - self.raw_data[parIdx] = np.array(Inputs.Marginals[parIdx].InputValues) + self.raw_data[parIdx] = np.array(Inputs.Marginals[parIdx].input_data) except: self.raw_data[parIdx] = self.JDist[parIdx].sample(self.MCSize) # Create orthogonal polynomial coefficients if necessary - if self.metaModel.lower() != 'gpe' \ + if self.meta_Model.lower() != 'gpe' \ and MaxPceDegree is not None \ and Inputs.polycoeffsFlag: self.polycoeffs = {} @@ -371,12 +419,12 @@ class ExpDesigns: for parIdx in range(ndim): mu = np.mean(self.raw_data[parIdx]) std = np.std(self.raw_data[parIdx]) - self.InputObj.Marginals[parIdx].Moments = [mu, std] + self.InputObj.Marginals[parIdx].moments = [mu, std] # Generate the bounds based on given inputs for marginals BoundTuples = [] for i in range(ndim): - if Inputs.Marginals[i].DistType == 'unif': + if Inputs.Marginals[i].dist_type == 'unif': low_bound, up_bound = Inputs.Marginals[i].Parameters else: low_bound = np.min(self.raw_data[i]) @@ -408,7 +456,7 @@ class ExpDesigns: Samples = np.zeros((NrSamples, self.ndim)) for idxPa in range(self.ndim): - # InputValues given + # input_data given sample_size = len(self.raw_data[idxPa]) randIdx = np.random.randint(0, sample_size, NrSamples) Samples[:, idxPa] = self.raw_data[idxPa, randIdx] @@ -467,7 +515,7 @@ class ExpDesigns: SortUniqueCombos = np.vstack((SortUniqueCombos, SortUnicomb)) # Output the collocation points - if self.SamplingMethod == 'LSCM': + if self.sampling_method == 'LSCM': OptimalCollocationPointsBase = SortUniqueCombos else: OptimalCollocationPointsBase = SortUniqueCombos[0:self.NrSamples] diff --git a/BayesValidRox/surrogate_models/Exploration.py b/BayesValidRox/surrogate_models/Exploration.py index f8d977701cc81c216a89d26f43bc71495b9de96a..2504a15d5285acc930b13db30422e2acf613e004 100644 --- a/BayesValidRox/surrogate_models/Exploration.py +++ b/BayesValidRox/surrogate_models/Exploration.py @@ -46,9 +46,9 @@ class Exploration: and their associated exploration scores. """ PCEModel = self.PCEModel - ExplorationMethod = PCEModel.ExpDesign.ExploreMethod + explore_method = PCEModel.ExpDesign.explore_method - if ExplorationMethod == 'Voronoi': + if explore_method == 'Voronoi': print("\n") print(' The Voronoi-based method is selected as the exploration method.') print("\n") @@ -58,7 +58,7 @@ class Exploration: else: print("\n") - print(' The '+ExplorationMethod+'-Method is selected as the exploration method.') + print(f' The {explore_method}-Method is selected as the exploration method.') print("\n") # Generate samples using the MC method allCandidates, scoreExploration = self.getMCSamples() @@ -174,9 +174,9 @@ class Exploration: """ PCEModel = self.PCEModel - ExplorationMethod = PCEModel.ExpDesign.ExploreMethod + explore_method = PCEModel.ExpDesign.explore_method mcCriterion = self.mcCriterion - if allCandidates is None: + if allCandidates is None: nNewSamples = self.numNewSamples else: nNewSamples = allCandidates.shape[0] @@ -188,7 +188,8 @@ class Exploration: # ----- Compute the number of random points ----- if allCandidates is None: # Generate MC Samples - allCandidates = PCEModel.ExpDesign.GetSample(self.numNewSamples, 'random') + allCandidates = PCEModel.ExpDesign.generate_samples(self.numNewSamples, + explore_method) self.allCandidates = allCandidates # initialization @@ -294,7 +295,7 @@ class Exploration: # points[:,i] = self.scaleColumns(points[:,i],Bounds[i][0],Bounds[i][1]) ExpDesign = ExpDesigns(PCEModel.Inputs) - points = ExpDesign.GetSample(nPoints, 'random') + points = ExpDesign.generate_samples(nPoints, 'random') self.allCandidates = points diff --git a/BayesValidRox/surrogate_models/Input.py b/BayesValidRox/surrogate_models/Input.py index c362406a931353ed2d5e031be0e102c39b0a0eaf..b02ec0730aec98eb333a6c2397ca3bcdbff792fe 100644 --- a/BayesValidRox/surrogate_models/Input.py +++ b/BayesValidRox/surrogate_models/Input.py @@ -1,27 +1,33 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Sat Aug 24 14:44:26 2019 +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart -@author: farid +Created on Sat Aug 24 2019 """ - + class Input: def __init__(self): self.Marginals = [] - self.Input_distributions = None self.polycoeffsFlag = True self.Rosenblatt = False - + def addMarginals(self): self.Marginals.append(Marginal()) - + + # Nested class class Marginal: def __init__(self): - self.Name = None - self.DistType = None - self.Moments = None - self.InputValues = [] - self.Parameters = [0, 1] \ No newline at end of file + self.name = None + self.dist_type = None + self.moments = None + self.input_data = [] + self.parameters = [0, 1] diff --git a/BayesValidRox/surrogate_models/aPoly_Construction.py b/BayesValidRox/surrogate_models/aPoly_Construction.py index 24b5f89db755d816a8b4533bba4e308aef7dbe8f..89083442136d07c633c25af573d49172d25e462c 100644 --- a/BayesValidRox/surrogate_models/aPoly_Construction.py +++ b/BayesValidRox/surrogate_models/aPoly_Construction.py @@ -25,15 +25,11 @@ def aPoly_Construction(Data, degree): Raw data. degree : int Maximum polynomial degree. - parIdx : TYPE - DESCRIPTION. - save : TYPE, optional - DESCRIPTION. The default is False. Returns ------- Polynomial : array - The coefficients of the orthonormal polynomials. + The coefficients of the univariate orthonormal polynomials. """ @@ -45,7 +41,7 @@ def aPoly_Construction(Data, degree): MeanOfData = np.mean(Data) Data = Data/MeanOfData - # Compute raw moments for input data + # Compute raw moments of input data raw_moments = [np.sum(np.power(Data, p))/nsamples for p in range(2*dd+2)] # Main Loop for Polynomial with degree up to dd diff --git a/BayesValidRox/surrogate_models/eval_rec_rule.py b/BayesValidRox/surrogate_models/eval_rec_rule.py index d2d67d6ec4052f46b315d3f0b81e2460a8f2895a..b583c7eb2ec58d55d19b34130812730d21a12368 100644 --- a/BayesValidRox/surrogate_models/eval_rec_rule.py +++ b/BayesValidRox/surrogate_models/eval_rec_rule.py @@ -3,7 +3,6 @@ """ - Based on the implementation in UQLab [1]. References: @@ -27,14 +26,30 @@ Created on Fri Jan 14 2022 """ import numpy as np from numpy.polynomial.polynomial import polyval -# import scipy.stats as st -from .glexindex import glexindex -# from .aPoly_Construction import aPoly_Construction -def poly_rec_coeffs(n_max, polytype, params=None): +def poly_rec_coeffs(n_max, poly_type, params=None): + """ + Computes the recurrence coefficients for classical Wiener-Askey orthogonal + polynomials. + + Parameters + ---------- + n_max : int + Maximum polynomial degree. + poly_type : string + Polynomial type. + params : list, optional + Parameters required for `laguerre` poly type. The default is None. + + Returns + ------- + AB : dict + The 3 term recursive coefficients and the applicable ranges. + + """ - if polytype == 'legendre': + if poly_type == 'legendre': def an(n): return np.zeros((n+1, 1)) @@ -48,7 +63,7 @@ def poly_rec_coeffs(n_max, polytype, params=None): bounds = [-1, 1] - elif polytype == 'hermite': + elif poly_type == 'hermite': def an(n): return np.zeros((n+1, 1)) @@ -62,7 +77,7 @@ def poly_rec_coeffs(n_max, polytype, params=None): bounds = [-np.inf, np.inf] - elif polytype == 'laguerre': + elif poly_type == 'laguerre': def an(n): a = np.zeros((n+1, 1)) @@ -85,9 +100,27 @@ def poly_rec_coeffs(n_max, polytype, params=None): return AB -def eval_rec_rule(x, max_deg, polyType): +def eval_rec_rule(x, max_deg, poly_type): + """ + Evaluates the polynomial that corresponds to the Jacobi matrix defined + from the AB. + + Parameters + ---------- + x : array (n_samples) + Points where the polynomials are evaluated. + max_deg : int + Maximum degree. + poly_type : string + Polynomial type. + + Returns + ------- + values : array of shape (n_samples, max_deg+1) + Polynomials corresponding to the Jacobi matrix. - AB = poly_rec_coeffs(max_deg, polyType) + """ + AB = poly_rec_coeffs(max_deg, poly_type) AB = AB['alpha_beta'] values = np.zeros((len(x), AB.shape[0]+1)) @@ -100,154 +133,65 @@ def eval_rec_rule(x, max_deg, polyType): return values[:, 1:] -def eval_rec_rule_arbitrary(x, max_deg, polycoeffs): - P = max_deg+1 - values = np.zeros((len(x), P)) +def eval_rec_rule_arbitrary(x, max_deg, poly_coeffs): + """ + Evaluates the polynomial at sample array x. + + Parameters + ---------- + x : array (n_samples) + Points where the polynomials are evaluated. + max_deg : int + Maximum degree. + poly_coeffs : dict + Polynomial coefficients computed based on moments. + + Returns + ------- + values : array of shape (n_samples, max_deg+1) + Univariate Polynomials evaluated at samples. + + """ + values = np.zeros((len(x), max_deg+1)) - for deg in range(P): - values[:, deg] = polyval(x, polycoeffs[deg]).T + for deg in range(max_deg+1): + values[:, deg] = polyval(x, poly_coeffs[deg]).T return values -def eval_univ_basis(x, max_deg, polyTypes, apolycoeffs=None): +def eval_univ_basis(x, max_deg, poly_types, apoly_coeffs=None): + """ + Evaluates univariate regressors along input directions. + + Parameters + ---------- + x : array of shape (n_samples, n_params) + Training samples. + max_deg : int + Maximum polynomial degree. + poly_types : list of strings + List of polynomial types for all parameters. + apoly_coeffs : dict , optional + Polynomial coefficients computed based on moments. The default is None. + + Returns + ------- + univ_vals : array of shape (n_samples, n_params, max_deg+1) + Univariate polynomials for all degrees and parameters evaluated at x. + """ # Initilize the output array n_samples, n_params = x.shape - P = max_deg+1 - univ_vals = np.zeros((n_samples, n_params, P)) + univ_vals = np.zeros((n_samples, n_params, max_deg+1)) for i in range(n_params): - if polyTypes[i] == 'arbitrary': - polycoeffs = apolycoeffs[f'p_{i+1}'] + if poly_types[i] == 'arbitrary': + polycoeffs = apoly_coeffs[f'p_{i+1}'] univ_vals[:, i] = eval_rec_rule_arbitrary(x[:, i], max_deg, polycoeffs) else: - univ_vals[:, i] = eval_rec_rule(x[:, i], max_deg, polyTypes[i]) + univ_vals[:, i] = eval_rec_rule(x[:, i], max_deg, poly_types[i]) return univ_vals - - -def create_psi(basis_indices, univ_p_val): - """ - This function assemble the design matrix Psi from the given basis index - set INDICES and the univariate polynomial evaluations univ_p_val. - """ - - # number of input variables and size of the experimental design - n_samples, n_params, _ = univ_p_val.shape - - # number of basis elements - P = basis_indices.shape[0] - - # check that the variables have consistent sizes - if n_params != basis_indices.shape[1]: - raise ValueError("The shapes of BasisIndices and " - "univ_p_val don't match!!") - - # Preallocate the Psi matrix for performance - Psi = np.ones((n_samples, P), dtype=np.float64) - - # Assemble the Psi matrix - for m in range(basis_indices.shape[1]): - aa = np.where(basis_indices[:, m] > 0)[0] - try: - basisIdx = basis_indices[aa, m] - bb = np.reshape(univ_p_val[:, m, basisIdx], Psi[:, aa].shape) - Psi[:, aa] = np.multiply(Psi[:, aa], bb) - except: - raise ValueError - - return Psi - - -def create_basis_indices(nparams, deg, q): - return glexindex(start=0, stop=deg+1, dimensions=nparams, reverse=True, - graded=True, cross_truncation=q) - - -# def fit_transform(X, distJoints, polyTypes, params=None): -# # uq_GetExpDesignSample --> uq_GeneralIsopTransform -# # --> uq_IsopTransform & uq_poly_marginals -# # Start transformation -# n_samples, n_params = X.shape -# cdfx = np.zeros((X.shape)) -# Y = np.zeros((X.shape)) - -# for par_i in range(n_params): - -# # Extract the parameters of the original space -# dist = distJoints[par_i] -# cdf = np.vectorize(lambda x: dist.cdf(x)) - -# # Extract the parameters of the transformation space based on polyType -# if polyTypes[par_i] == 'legendre': -# # Generate Y_Dists based -# params_Y = [-1, 1] -# dist_Y = st.uniform(loc=params_Y[0], scale=params_Y[1]-params_Y[0]) -# inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) - -# elif polyTypes[par_i] == 'hermite': -# params_Y = [0, 1] -# dist_Y = st.norm(loc=params_Y[0], scale=params_Y[1]) -# inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) - -# elif polyTypes[par_i] == 'laguerre': -# params_Y = [1, params[1]] -# dist_Y = st.gamma(loc=params_Y[0], scale=params_Y[1]) -# inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) - -# elif polyTypes[par_i] == 'arbitrary': -# # TODO: Unknown dist with gaussian_kde -# mu_X = X_marginals[par_i].Moments[0] -# stDev_X = X_marginals[par_i].Moments[1] -# cdf = np.vectorize(lambda x: (x - mu_X) / stDev_X) - -# mu_Y = Y_marginals[par_i].Moments[0] -# stDev_Y = Y_marginals[par_i].Moments[1] -# inv_cdf = np.vectorize(lambda x: stDev_Y * x + mu_Y) -# pass - -# # Compute CDF_x(X) -# cdfx[:, par_i] = cdf(X[:, par_i]) - -# # Compute invCDF_y(cdfx) -# Y[:, par_i] = inv_cdf(cdfx[:, par_i]) - -# return Y - - -# params = np.array([[5.0e-4, 1.5e-3], -# [5e-3, 5.1e-3], -# [1e-10, 1e-8]]) - -# distJoints = [st.uniform(loc=params[0, 0], scale=params[0, 1]-params[0, 0]), -# st.uniform(loc=params[1, 0], scale=params[1, 1]-params[1, 0]), -# st.uniform(loc=params[2, 0], scale=params[2, 1]-params[2, 0])] -# polyTypes = ['legendre', 'legendre', 'legendre'] -# max_deg = 12 -# q = 0.85 -# n_parms = params.shape[0] - -# ED_X = np.loadtxt('../tests/PA-A/ExpDesignX.csv', delimiter=',') -# # data = np.loadtxt('../tests/PA-A/data.csv', delimiter=',') - - -# ED_X_tr = fit_transform(ED_X, distJoints, polyTypes) -# basis_indices = create_basis_indices(n_parms, max_deg, q) -# univ_vals = eval_univ_basis(ED_X_tr, max_deg, polyTypes) -# psi = create_psi(basis_indices, univ_vals) - - -# # Arbirtary polynomials -# polycoeffs = {} -# for parIdx in range(n_parms): -# data = distJoints[parIdx].rvs(20000) -# polyTypes[parIdx] = 'arbitrary' -# polycoeffs[f'p_{parIdx+1}'] = aPoly_Construction(data, max_deg) - -# univ_vals_aPCE = eval_univ_basis(ED_X, max_deg, polyTypes, -# apolycoeffs=polycoeffs) - -# psi_aPCE = create_psi(basis_indices, univ_vals_aPCE) diff --git a/BayesValidRox/surrogate_models/sequential_design.py b/BayesValidRox/surrogate_models/sequential_design.py index f77b080b73620c4d61297ae999547b67da2a7f74..f7bc89dc1eeead10bbd21e4a0f412f8dc1df45f6 100644 --- a/BayesValidRox/surrogate_models/sequential_design.py +++ b/BayesValidRox/surrogate_models/sequential_design.py @@ -22,1629 +22,1832 @@ from .Exploration import Exploration class SeqDesign(): + """ Sequential experimental design + This class provieds method for trainig the meta-model in an iterative + manners. + The main method to execute the task is `train_seq_design`, which + recieves a model object and returns the trained metamodel. + """ + def __init__(self, MetaModel): self.MetaModel = MetaModel self.Model = MetaModel.ModelObj # ------------------------------------------------------------------------- - def util_VarBasedDesign(self, X_can, index, UtilMethod='Entropy'): - """ - Computes the exploitation scores based on: - active learning MacKay(ALM) and active learning Cohn (ALC) - Paper: Sequential Design with Mutual Information for Computer - Experiments (MICE): Emulation of a Tsunami Model by Beck and Guillas - (2016) + def train_seq_design(self, Model): """ - OutputDictY = self.MetaModel.ExpDesign.Y - OutputNames = list(OutputDictY.keys())[1:] + Starts the adaptive sequential design for refining the surrogate model + by selecting training points in a sequential manner. - if UtilMethod == 'Entropy': - # ----- Entropy/MMSE/active learning MacKay(ALM) ----- - # Compute perdiction variance of the old model - Y_PC_can, std_PC_can = self.MetaModel.eval_metamodel(samples=X_can) - canPredVar = {key: std_PC_can[key]**2 for key in OutputNames} + Parameters + ---------- + Model : object + An object containing all model specifications. - varPCE = np.zeros((len(OutputNames), X_can.shape[0])) - for KeyIdx, key in enumerate(OutputNames): - varPCE[KeyIdx] = np.max(canPredVar[key], axis=1) - score = np.max(varPCE, axis=0) + Returns + ------- + PCEModel : object + Meta model object. - elif UtilMethod == 'EIGF': - # ----- Expected Improvement for Global fit ----- - # Eq (5) from Liu et al.(2018) - # Compute perdiction error and variance of the old model - Y_PC_can, std_PC_can = self.eval_metamodel(samples=X_can) - predError = {key: Y_PC_can[key] for key in OutputNames} - canPredVar = {key: std_PC_can[key]**2 for key in OutputNames} + """ + MetaModel = self.MetaModel + self.Model = MetaModel.ModelObj - EIGF_PCE = np.zeros((len(OutputNames), X_can.shape[0])) - for KeyIdx, key in enumerate(OutputNames): - residual = predError[key] - OutputDictY[key][index] - var = canPredVar[key] - EIGF_PCE[KeyIdx] = np.max(residual**2 + var, axis=1) - score = np.max(EIGF_PCE, axis=0) + # Initialization + errorIncreases = False + MetaModel.SeqModifiedLOO = {} + MetaModel.seqValidError = {} + MetaModel.SeqBME = {} + MetaModel.SeqKLD = {} + MetaModel.SeqDistHellinger = {} + MetaModel.seqRMSEMean = {} + MetaModel.seqRMSEStd = {} + MetaModel.seqMinDist = [] + pce = True if MetaModel.meta_model_type.lower() != 'gpe' else False + mc_ref = True if hasattr(Model, 'MCReference') else False + if mc_ref: + Model.read_mc_reference() - return -1 * score # -1 is for minimization instead of maximization + if not hasattr(MetaModel, 'valid_likelihoods'): + MetaModel.valid_samples = [] + MetaModel.valid_model_runs = [] + MetaModel.valid_likelihoods = [] - # ------------------------------------------------------------------------- - def util_BayesianActiveDesign(self, X_can, sigma2Dict, var='DKL'): + # Get the parameters + max_n_samples = MetaModel.ExpDesign.n_max_samples + mod_LOO_threshold = MetaModel.ExpDesign.mod_LOO_threshold + n_canddidate = MetaModel.ExpDesign.n_canddidate + post_snapshot = MetaModel.ExpDesign.post_snapshot + n_replication = MetaModel.ExpDesign.n_replication - # Evaluate the PCE metamodels at that location ??? - Y_mean_can, Y_std_can = self.eval_metamodel(samples=np.array([X_can])) + # Handle if only one UtilityFunctions is provided + if not isinstance(MetaModel.ExpDesign.util_func, list): + util_func = [MetaModel.ExpDesign.util_func] - # Get the data - obs_data = self.Observations - nObs = self.Model.nObs - # TODO: Analytical DKL - # Sample a distribution for a normal dist - # with Y_mean_can as the mean and Y_std_can as std. + # ---------- Initial PCEModel ---------- + PCEModel = MetaModel.train_norm_design(Model) + initPCEModel = deepcopy(PCEModel) - # priorMean, priorSigma2, Obs = np.empty((0)),np.empty((0)),np.empty((0)) + # TODO: Loop over outputs + OutputName = Model.Output.names - # for key in list(Y_mean_can): - # # concatenate the measurement error - # Obs = np.hstack((Obs,ObservationData[key])) + # Estimation of the integral via Monte Varlo integration + obs_data = PCEModel.Observations - # # concatenate the mean and variance of prior predictive - # means, stds = Y_mean_can[key][0], Y_std_can[key][0] - # priorMean = np.hstack((priorSigma2,means)) - # priorSigma2 = np.hstack((priorSigma2,stds**2)) + # Check if data is provided + TotalSigma2 = np.empty((0, 1)) + if len(obs_data) != 0 and hasattr(PCEModel, 'Discrepancy'): + # ------ Prepare diagonal enteries for co-variance matrix --------- + for keyIdx, key in enumerate(Model.Output.names): + # optSigma = 'B' + sigma2 = np.array(PCEModel.Discrepancy.Parameters[key]) + TotalSigma2 = np.append(TotalSigma2, sigma2) - # # Covariance Matrix of prior - # covPrior = np.zeros((priorSigma2.shape[0], priorSigma2.shape[0]), float) - # np.fill_diagonal(covPrior, priorSigma2) + # Calculate the initial BME + out = self.__BME_Calculator(initPCEModel, obs_data, TotalSigma2) + initBME, initKLD, initPosterior, initDistHellinger = out + print("\nInitial BME:", initBME) + print("Initial KLD:", initKLD) - # # Covariance Matrix of Likelihood - # covLikelihood = np.zeros((sigma2Dict.shape[0], sigma2Dict.shape[0]), float) - # np.fill_diagonal(covLikelihood, sigma2Dict) + # Posterior snapshot (initial) + if post_snapshot: + MAP = PCEModel.ExpDesign.max_a_post + parNames = PCEModel.ExpDesign.par_names + print('Posterior snapshot (initial) is being plotted...') + self.__posteriorPlot(initPosterior, MAP, parNames, + 'SeqPosterior_init') - # # Calculate moments of the posterior (Analytical derivation) - # n = priorSigma2.shape[0] - # covPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))),covLikelihood/n) + # Check the convergence of the Mean & Std + if mc_ref and pce: + initRMSEMean, initRMSEStd = self.__error_Mean_Std() + print(f"Initial Mean and Std error: {initRMSEMean}, {initRMSEStd}") - # meanPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))) , Obs) + \ - # np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))), - # priorMean/n) - # # Compute DKL from prior to posterior - # term1 = np.trace(np.dot(np.linalg.inv(covPrior),covPost)) - # deltaMean = priorMean-meanPost - # term2 = np.dot(np.dot(deltaMean,np.linalg.inv(covPrior)),deltaMean[:,None]) - # term3 = np.log(np.linalg.det(covPrior)/np.linalg.det(covPost)) - # DKL = 0.5 * (term1 + term2 - n + term3)[0] + # Read the initial experimental design + Xinit = initPCEModel.ExpDesign.X + initTotalNSamples = len(PCEModel.ExpDesign.X) + initYprev = initPCEModel.ModelOutputDict + initLCerror = initPCEModel.LCerror - # ---------- Inner MC simulation for computing Utility Value ---------- - # Estimation of the integral via Monte Varlo integration - MCsize = 20000 - ESS = 0 + # Read the initial ModifiedLOO + if pce: + Scores_all, varExpDesignY = [], [] + for OutName in OutputName: + y = initPCEModel.ExpDesign.Y[OutName] + Scores_all.append(list( + initPCEModel.score_dict[OutName].values())) + if PCEModel.dim_red_method.lower() == 'pca': + pca = PCEModel.pca[OutName] + components = pca.transform(y) + varExpDesignY.append(np.var(components, axis=0)) + else: + varExpDesignY.append(np.var(y, axis=0)) - while ((ESS > MCsize) or (ESS < 1)): + Scores = [item for sublist in Scores_all for item in sublist] + weights = [item for sublist in varExpDesignY for item in sublist] + initModifiedLOO = [np.average([1-score for score in Scores], + weights=weights)] - # Sample a distribution for a normal dist - # with Y_mean_can as the mean and Y_std_can as std. - Y_MC, std_MC = {}, {} - logPriorLikelihoods = np.zeros((MCsize)) - for key in list(Y_mean_can): - means, stds = Y_mean_can[key][0], Y_std_can[key][0] - # cov = np.zeros((means.shape[0], means.shape[0]), float) - # np.fill_diagonal(cov, stds**2) + if len(PCEModel.valid_model_runs) != 0: + initValidError = self.__validError() + initValidError = list(initValidError.values()) + print("\nInitial ValidError:", initValidError) - Y_MC[key] = np.zeros((MCsize, nObs)) - logsamples = np.zeros((MCsize, nObs)) - for i in range(nObs): - NormalDensity = stats.norm(means[i], stds[i]) - Y_MC[key][:, i] = NormalDensity.rvs(MCsize) - logsamples[:, i] = NormalDensity.logpdf(Y_MC[key][:, i]) + # Replicate the sequential design + for repIdx in range(n_replication): + print(f'>>>> Replication: {repIdx+1}<<<<') - logPriorLikelihoods = np.sum(logsamples, axis=1) - std_MC[key] = np.zeros((MCsize, means.shape[0])) + # To avoid changes ub original aPCE object + # PCEModel = copy.deepcopy(initPCEModel) + PCEModel.ExpDesign.X = Xinit + PCEModel.ExpDesign.Y = initYprev + PCEModel.LCerror = initLCerror - # Likelihood computation (Comparison of data and simulation - # results via PCE with candidate design) - likelihoods = self.normpdf(Y_MC, std_MC, obs_data, sigma2Dict) + for util_f in util_func: + print(f'>>>> UtilityFunction: {util_f} <<<<') + # To avoid changes ub original aPCE object + # PCEModel = copy.deepcopy(initPCEModel) + PCEModel.ExpDesign.X = Xinit + PCEModel.ExpDesign.Y = initYprev + PCEModel.LCerror = initLCerror - # Check the Effective Sample Size (1<ESS<MCsize) - ESS = 1 / np.sum(np.square(likelihoods/np.nansum(likelihoods))) + # Set the experimental design + Xprev = Xinit + total_n_samples = initTotalNSamples + Yprev = initYprev - # Enlarge sample size if it doesn't fulfill the criteria - if ((ESS > MCsize) or (ESS < 1)): - MCsize *= 10 - ESS = 0 + Xfull = [] + Yfull = [] - # Rejection Step - # Random numbers between 0 and 1 - unif = np.random.rand(1, MCsize)[0] + # Store the initial ModifiedLOO + if pce: + print("\nInitial ModifiedLOO:", initModifiedLOO) + ModifiedLOO = initModifiedLOO + SeqModifiedLOO = np.array(ModifiedLOO) - # Reject the poorly performed prior - accepted = (likelihoods/np.max(likelihoods)) >= unif + if len(PCEModel.valid_model_runs) != 0: + ValidError = initValidError + SeqValidError = np.array(ValidError) - # Prior-based estimation of BME - logBME = np.log(np.nanmean(likelihoods)) + # Check if data is provided + if len(obs_data) != 0: + SeqBME = np.array([initBME]) + SeqKLD = np.array([initKLD]) + SeqDistHellinger = np.array([initDistHellinger]) - # Posterior-based expectation of likelihoods - postLikelihoods = likelihoods[accepted] - postExpLikelihoods = np.mean(np.log(postLikelihoods)) + if mc_ref and pce: + seqRMSEMean = np.array([initRMSEMean]) + seqRMSEStd = np.array([initRMSEStd]) - # Posterior-based expectation of prior densities - postExpPrior = np.mean(logPriorLikelihoods[accepted]) + # Start Sequential Experimental Design + postcnt = 1 + itrNr = 1 + while total_n_samples < max_n_samples: - # Utility function Eq.2 in Ref. (2) - # Posterior covariance matrix after observing data y - # Kullback-Leibler Divergence (Sergey's paper) - if var == 'DKL': + # Optimal Bayesian Design + PCEModel.ExpDesignFlag = 'sequential' + Xnew, updatedPrior = self.opt_SeqDesign(TotalSigma2, + n_canddidate, + util_f) - # TODO: Calculate the correction factor for BME - # BMECorrFactor = self.BME_Corr_Weight(PCE_SparseBayes_can, - # ObservationData, sigma2Dict) - # BME += BMECorrFactor - # Haun et al implementation - # U_J_d = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) - U_J_d = postExpLikelihoods - logBME + S = np.min(distance.cdist(Xinit, Xnew, 'euclidean')) + PCEModel.seqMinDist.append(S) + print("\nmin Dist from OldExpDesign:", S) + print("\n") - # Marginal log likelihood - elif var == 'BME': - U_J_d = logBME + # Evaluate the full model response at the new sample: + Ynew, _ = Model.run_model_parallel( + Xnew, prevRun_No=total_n_samples + ) + total_n_samples += Xnew.shape[0] - # Entropy-based information gain - elif var == 'infEntropy': - logBME = np.log(np.nanmean(likelihoods)) - infEntropy = logBME - postExpPrior - postExpLikelihoods - U_J_d = infEntropy * -1 # -1 for minimization + # ------ Plot the surrogate model vs Origninal Model ------ + if hasattr(PCEModel, 'adapt_verbose') and \ + PCEModel.adapt_verbose: + from PostProcessing.adaptPlot import adaptPlot + y_hat, std_hat = PCEModel.eval_metamodel(samples=Xnew) + adaptPlot(PCEModel, Ynew, y_hat, std_hat, plotED=False) - # Bayesian information criterion - elif var == 'BIC': - coeffs = self.MetaModel.CoeffsDict.values() - nModelParams = max(len(v) for val in coeffs for v in val.values()) - maxL = np.nanmax(likelihoods) - U_J_d = -2 * np.log(maxL) + np.log(nObs) * nModelParams + # -------- Retrain the surrogate model ------- + # Extend new experimental design + Xfull = np.vstack((Xprev, Xnew)) - # Akaike information criterion - elif var == 'AIC': - coeffs = self.MetaModel.CoeffsDict.values() - nModelParams = max(len(v) for val in coeffs for v in val.values()) - maxlogL = np.log(np.nanmax(likelihoods)) - AIC = -2 * maxlogL + 2 * nModelParams - # 2 * nModelParams * (nModelParams+1) / (nObs-nModelParams-1) - penTerm = 0 - U_J_d = 1*(AIC + penTerm) + # Updating existing key's value + for OutIdx in range(len(OutputName)): + OutName = OutputName[OutIdx] + try: + Yfull = np.vstack((Yprev[OutName], Ynew[OutName])) + except: + Yfull = np.vstack((Yprev[OutName], Ynew)) - # Deviance information criterion - elif var == 'DIC': - # D_theta_bar = np.mean(-2 * Likelihoods) - N_star_p = 0.5 * np.var(np.log(likelihoods[likelihoods != 0])) - Likelihoods_theta_mean = self.normpdf(Y_mean_can, Y_std_can, - obs_data, sigma2Dict) - DIC = -2 * np.log(Likelihoods_theta_mean) + 2 * N_star_p + PCEModel.ModelOutputDict[OutName] = Yfull - U_J_d = DIC + PCEModel.ExpDesign.sampling_method = 'user' + PCEModel.ExpDesign.X = Xfull + PCEModel.ExpDesign.Y = PCEModel.ModelOutputDict - else: - print('The algorithm you requested has not been implemented yet!') + # save the Experimental Design for next iteration + Xprev = Xfull + Yprev = PCEModel.ModelOutputDict - # Handle inf and NaN (replace by zero) - if np.isnan(U_J_d) or U_J_d == -np.inf or U_J_d == np.inf: - U_J_d = 0.0 + # Pass the new prior as the input + PCEModel.input_obj.polycoeffsFlag = False + if updatedPrior is not None: + PCEModel.Inputs.polycoeffsFlag = True + print("updatedPrior:", updatedPrior.shape) + # Arbitrary polynomial chaos + for i in range(updatedPrior.shape[1]): + PCEModel.Inputs.Marginals[i].DistType = None + x = updatedPrior[:, i] + PCEModel.Inputs.Marginals[i].InputValues = x - return -1 * U_J_d # -1 is for minimization instead of maximization + prevPCEModel = PCEModel + PCEModel = PCEModel.train_norm_design(Model) - # ------------------------------------------------------------------------- - def util_BayesianDesign(self, X_can, X_MC, sigma2Dict, var='DKL'): + # -------- Evaluate the retrain surrogate model ------- + # Compute the validation error + if len(PCEModel.valid_model_runs) != 0: + validError = self.__validError() + ValidError = list(validError.values()) + print("\nUpdated ValidError:", ValidError) - # To avoid changes ub original aPCE object - Model = self.Model - PCEModel = deepcopy(self.MetaModel) + # Extract Modified LOO from Output + if pce: + Scores_all, varExpDesignY = [], [] + for OutName in OutputName: + y = initPCEModel.ExpDesign.Y[OutName] + Scores_all.append(list( + PCEModel.score_dict[OutName].values())) + if PCEModel.dim_red_method.lower() == 'pca': + pca = PCEModel.pca[OutName] + components = pca.transform(y) + varExpDesignY.append(np.var(components, + axis=0)) + else: + varExpDesignY.append(np.var(y, axis=0)) + Scores = [item for sublist in Scores_all for item + in sublist] + weights = [item for sublist in varExpDesignY for item + in sublist] + ModifiedLOO = [np.average( + [1-score for score in Scores], weights=weights)] - # Old Experimental design - oldExpDesignX = PCEModel.ExpDesign.X - oldExpDesignY = PCEModel.ExpDesign.Y + print('\n') + print(f"Updated ModifiedLOO {util_f}:\n", ModifiedLOO) + print("Xfull:", Xfull.shape) + print('\n') - # Evaluate the PCE metamodels at that location ??? - Y_PC_can, _ = self.MetaModel.eval_metamodel(samples=np.array([X_can])) + # check the direction of the error (on average): + # if it increases consistently stop the iterations + n_checks = 3 + if itrNr > n_checks * PCEModel.ExpDesign.n_new_samples: + # ss<0 error increasing + ss = np.sign(SeqModifiedLOO - ModifiedLOO) + errorIncreases = np.sum(np.mean(ss[-2:], axis=1)) <= \ + -1*n_checks - # Add all suggestion as new ExpDesign - NewExpDesignX = np.vstack((oldExpDesignX, X_can)) + # If error is increasing in the last n_check iteration, + # stop the search and return the previous PCEModel + if errorIncreases: + print("Warning: The modified error is increasing " + "compared to the last {n_checks} iterations.") + PCEModel = prevPCEModel + break + else: + prevPCEModel = PCEModel - NewExpDesignY = {} - for key in oldExpDesignY.keys(): - try: - NewExpDesignY[key] = np.vstack((oldExpDesignY[key], - Y_PC_can[key])) - except: - NewExpDesignY[key] = oldExpDesignY[key] + # Store updated ModifiedLOO + if pce: + SeqModifiedLOO = np.vstack( + (SeqModifiedLOO, ModifiedLOO)) + if len(PCEModel.valid_model_runs) != 0: + SeqValidError = np.vstack( + (SeqValidError, ValidError)) - PCEModel.ExpDesign.SamplingMethod = 'user' - PCEModel.ExpDesign.X = NewExpDesignX - PCEModel.ExpDesign.Y = NewExpDesignY + # -------- Caclulation of BME as accuracy metric ------- + # Check if data is provided + if len(obs_data) != 0: + # Calculate the initial BME + out = self.__BME_Calculator(PCEModel, obs_data, + TotalSigma2) + BME, KLD, Posterior, DistHellinger = out + print('\n') + print("Updated BME:", BME) + print("Updated KLD:", KLD) + print('\n') - # Create the SparseBayes-based PCE metamodel: - PCEModel.Inputs.polycoeffsFlag = False - univ_p_val = self.MetaModel.univ_basis_vals(X_can) - G_n_m_all = np.zeros((len(Model.Output.names), Model.nObs)) + # Plot some snapshots of the posterior + step_snapshot = PCEModel.ExpDesign.step_snapshot + if post_snapshot and postcnt % step_snapshot == 0: + MAP = PCEModel.ExpDesign.max_a_post + parNames = PCEModel.ExpDesign.par_names + print('Posterior snapshot is being plotted...') + self.__posteriorPlot(Posterior, MAP, parNames, + 'SeqPosterior_{postcnt}') + postcnt += 1 - for idx, key in enumerate(Model.Output.names): - for i in range(Model.nObs): - BasisIndices = PCEModel.BasisDict[key]["y_"+str(i+1)] - clf_poly = PCEModel.clf_poly[key]["y_"+str(i+1)] - Mn = clf_poly.coef_ - Sn = clf_poly.sigma_ - beta = clf_poly.alpha_ - active = clf_poly.active_ - Psi = self.MetaModel.PCE_create_Psi(BasisIndices, univ_p_val) + # Check the convergence of the Mean&Std + if mc_ref and pce: + print('\n') + RMSE_Mean, RMSE_std = self.__error_Mean_Std() + print(f"Updated Mean and Std error: {RMSE_Mean}, " + f"{RMSE_std}") + print('\n') - Sn_new_inv = np.linalg.inv(Sn) - Sn_new_inv += beta * np.dot(Psi[:, active].T, Psi[:, active]) - Sn_new = np.linalg.inv(Sn_new_inv) + # Store the updated BME & KLD + # Check if data is provided + if len(obs_data) != 0: + SeqBME = np.vstack((SeqBME, BME)) + SeqKLD = np.vstack((SeqKLD, KLD)) + SeqDistHellinger = np.vstack((SeqDistHellinger, + DistHellinger)) + if mc_ref and pce: + seqRMSEMean = np.vstack((seqRMSEMean, RMSE_Mean)) + seqRMSEStd = np.vstack((seqRMSEStd, RMSE_std)) - Mn_new = np.dot(Sn_new_inv, Mn[active]).reshape(-1, 1) - Mn_new += beta * np.dot(Psi[:, active].T, Y_PC_can[key][0, i]) - Mn_new = np.dot(Sn_new, Mn_new).flatten() + if pce and any(LOO < mod_LOO_threshold + for LOO in ModifiedLOO): + break + itrNr += 1 + # Store updated ModifiedLOO and BME in dictonary + strKey = f'{util_f}_rep_{repIdx+1}' + if pce: + PCEModel.SeqModifiedLOO[strKey] = SeqModifiedLOO + if len(PCEModel.valid_model_runs) != 0: + PCEModel.seqValidError[strKey] = SeqValidError - # Compute new moments - mean_old = Mn[0] - mean_new = Mn_new[0] - std_old = np.sqrt(np.sum(np.square(Mn[1:]))) - std_new = np.sqrt(np.sum(np.square(Mn_new[1:]))) + # Check if data is provided + if len(obs_data) != 0: + PCEModel.SeqBME[strKey] = SeqBME + PCEModel.SeqKLD[strKey] = SeqKLD + if len(PCEModel.valid_likelihoods) != 0: + PCEModel.SeqDistHellinger[strKey] = SeqDistHellinger + if mc_ref and pce: + PCEModel.seqRMSEMean[strKey] = seqRMSEMean + PCEModel.seqRMSEStd[strKey] = seqRMSEStd - G_n_m = np.log(std_old/std_new) - 1./2 - G_n_m += std_new**2 / (2*std_new**2) - G_n_m += (mean_new - mean_old)**2 / (2*std_old**2) + return PCEModel - G_n_m_all[idx, i] = G_n_m + # ------------------------------------------------------------------------- + def util_VarBasedDesign(self, X_can, index, util_func='Entropy'): + """ + Computes the exploitation scores based on: + active learning MacKay(ALM) and active learning Cohn (ALC) + Paper: Sequential Design with Mutual Information for Computer + Experiments (MICE): Emulation of a Tsunami Model by Beck and Guillas + (2016) - clf_poly.coef_[active] = Mn_new - clf_poly.sigma_ = Sn_new - PCEModel.clf_poly[key]["y_"+str(i+1)] = clf_poly + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + index : int + Model output index. + UtilMethod : string, optional + Exploitation utility function. The default is 'Entropy'. - # return np.sum(G_n_m_all) - # PCEModel.create_PCE_normDesign(Model, OptDesignFlag=True) - PCE_SparseBayes_can = PCEModel + Returns + ------- + float + Score. - # Get the data - obs_data = self.MetaModel.Observations + """ + out_dict_y = self.MetaModel.ExpDesign.Y + out_names = self.MetaModel.ModelObj.Output.names - # ---------- Inner MC simulation for computing Utility Value ---------- - # Estimation of the integral via Monte Varlo integration - MCsize = X_MC.shape[0] - ESS = 0 + if util_func == 'Entropy': + # ----- Entropy/MMSE/active learning MacKay(ALM) ----- + # Compute perdiction variance of the old model + Y_PC_can, std_PC_can = self.MetaModel.eval_metamodel(samples=X_can) + canPredVar = {key: std_PC_can[key]**2 for key in out_names} - while ((ESS > MCsize) or (ESS < 1)): + varPCE = np.zeros((len(out_names), X_can.shape[0])) + for KeyIdx, key in enumerate(out_names): + varPCE[KeyIdx] = np.max(canPredVar[key], axis=1) + score = np.max(varPCE, axis=0) - # Enriching Monte Carlo samples if need be - if ESS != 0: - X_MC = self.MetaModel.ExpDesign.GetSample(MCsize, 'random') + elif util_func == 'EIGF': + # ----- Expected Improvement for Global fit ----- + # Eq (5) from Liu et al.(2018) + # Compute perdiction error and variance of the old model + Y_PC_can, std_PC_can = self.eval_metamodel(samples=X_can) + predError = {key: Y_PC_can[key] for key in out_names} + canPredVar = {key: std_PC_can[key]**2 for key in out_names} - # Evaluate the PCEModel at the given samples - Y_MC, std_MC = PCE_SparseBayes_can.eval_metamodel(samples=X_MC) + EIGF_PCE = np.zeros((len(out_names), X_can.shape[0])) + for KeyIdx, key in enumerate(out_names): + residual = predError[key] - out_dict_y[key][int(index)] + var = canPredVar[key] + EIGF_PCE[KeyIdx] = np.max(residual**2 + var, axis=1) + score = np.max(EIGF_PCE, axis=0) - # Likelihood computation (Comparison of data and simulation - # results via PCE with candidate design) - likelihoods = self.normpdf(Y_MC, std_MC, obs_data, sigma2Dict) + return -1 * score # -1 is for minimization instead of maximization - # Check the Effective Sample Size (1<ESS<MCsize) - ESS = 1 / np.sum(np.square(likelihoods/np.sum(likelihoods))) + # ------------------------------------------------------------------------- + def util_BayesianActiveDesign(self, X_can, sigma2Dict, var='DKL'): + """ + Computes scores based on Bayesian active design criterion (var). - # Enlarge sample size if it doesn't fulfill the criteria - if ((ESS > MCsize) or (ESS < 1)): - MCsize *= 10 - ESS = 0 + It is based on the following paper: + Oladyshkin, Sergey, Farid Mohammadi, Ilja Kroeker, and Wolfgang Nowak. + "Bayesian3 active learning for the gaussian process emulator using + information theory." Entropy 22, no. 8 (2020): 890. - # Rejection Step - # Random numbers between 0 and 1 - unif = np.random.rand(1, MCsize)[0] + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + var : string, optional + BAL design criterion. The default is 'DKL'. - # Reject the poorly performed prior - accepted = (likelihoods/np.max(likelihoods)) >= unif + Returns + ------- + float + Score. - # -------------------- Utility functions -------------------- - # Utility function Eq.2 in Ref. (2) - # Kullback-Leibler Divergence (Sergey's paper) - if var == 'DKL': + """ - # Prior-based estimation of BME - logBME = np.log(np.nanmean(likelihoods)) + # Evaluate the PCE metamodels at that location ??? + Y_mean_can, Y_std_can = self.eval_metamodel(samples=np.array([X_can])) - # Posterior-based expectation of likelihoods - postLikelihoods = likelihoods[accepted] - postExpLikelihoods = np.mean(np.log(postLikelihoods)) + # Get the data + obs_data = self.Observations + nObs = self.Model.nObs + # TODO: Analytical DKL + # Sample a distribution for a normal dist + # with Y_mean_can as the mean and Y_std_can as std. - # Haun et al implementation - U_J_d = np.mean(np.log(likelihoods[likelihoods != 0])- logBME) + # priorMean, priorSigma2, Obs = np.empty((0)),np.empty((0)),np.empty((0)) - U_J_d = np.sum(G_n_m_all) - # Ryan et al (2014) implementation - # importanceWeights = Likelihoods[Likelihoods!=0]/np.sum(Likelihoods[Likelihoods!=0]) - # U_J_d = np.mean(importanceWeights*np.log(Likelihoods[Likelihoods!=0])) - logBME + # for key in list(Y_mean_can): + # # concatenate the measurement error + # Obs = np.hstack((Obs,ObservationData[key])) - # U_J_d = postExpLikelihoods - logBME + # # concatenate the mean and variance of prior predictive + # means, stds = Y_mean_can[key][0], Y_std_can[key][0] + # priorMean = np.hstack((priorSigma2,means)) + # priorSigma2 = np.hstack((priorSigma2,stds**2)) - # Marginal likelihood - elif var == 'BME': + # # Covariance Matrix of prior + # covPrior = np.zeros((priorSigma2.shape[0], priorSigma2.shape[0]), float) + # np.fill_diagonal(covPrior, priorSigma2) - # Prior-based estimation of BME - logBME = np.log(np.nanmean(likelihoods)) - U_J_d = logBME + # # Covariance Matrix of Likelihood + # covLikelihood = np.zeros((sigma2Dict.shape[0], sigma2Dict.shape[0]), float) + # np.fill_diagonal(covLikelihood, sigma2Dict) - # Bayes risk likelihood - elif var == 'BayesRisk': + # # Calculate moments of the posterior (Analytical derivation) + # n = priorSigma2.shape[0] + # covPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))),covLikelihood/n) - U_J_d = -1 * np.var(likelihoods) + # meanPost = np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))) , Obs) + \ + # np.dot(np.dot(covPrior,np.linalg.inv(covPrior+(covLikelihood/n))), + # priorMean/n) + # # Compute DKL from prior to posterior + # term1 = np.trace(np.dot(np.linalg.inv(covPrior),covPost)) + # deltaMean = priorMean-meanPost + # term2 = np.dot(np.dot(deltaMean,np.linalg.inv(covPrior)),deltaMean[:,None]) + # term3 = np.log(np.linalg.det(covPrior)/np.linalg.det(covPost)) + # DKL = 0.5 * (term1 + term2 - n + term3)[0] - # Entropy-based information gain - elif var == 'infEntropy': - # Prior-based estimation of BME - logBME = np.log(np.nanmean(likelihoods)) + # ---------- Inner MC simulation for computing Utility Value ---------- + # Estimation of the integral via Monte Varlo integration + MCsize = 20000 + ESS = 0 - # Posterior-based expectation of likelihoods - postLikelihoods = likelihoods[accepted] / np.nansum(likelihoods[accepted]) - postExpLikelihoods = np.mean(np.log(postLikelihoods)) + while ((ESS > MCsize) or (ESS < 1)): - # Posterior-based expectation of prior densities - postExpPrior = np.mean(logPriorLikelihoods[accepted]) + # Sample a distribution for a normal dist + # with Y_mean_can as the mean and Y_std_can as std. + Y_MC, std_MC = {}, {} + logPriorLikelihoods = np.zeros((MCsize)) + for key in list(Y_mean_can): + means, stds = Y_mean_can[key][0], Y_std_can[key][0] + # cov = np.zeros((means.shape[0], means.shape[0]), float) + # np.fill_diagonal(cov, stds**2) - infEntropy = logBME - postExpPrior - postExpLikelihoods + Y_MC[key] = np.zeros((MCsize, nObs)) + logsamples = np.zeros((MCsize, nObs)) + for i in range(nObs): + NormalDensity = stats.norm(means[i], stds[i]) + Y_MC[key][:, i] = NormalDensity.rvs(MCsize) + logsamples[:, i] = NormalDensity.logpdf(Y_MC[key][:, i]) - U_J_d = infEntropy * -1 # -1 for minimization + logPriorLikelihoods = np.sum(logsamples, axis=1) + std_MC[key] = np.zeros((MCsize, means.shape[0])) - # D-Posterior-precision - elif var == 'DPP': - X_Posterior = X_MC[accepted] - # covariance of the posterior parameters - U_J_d = -np.log(np.linalg.det(np.cov(X_Posterior))) + # Likelihood computation (Comparison of data and simulation + # results via PCE with candidate design) + likelihoods = self.__normpdf(Y_MC, std_MC, obs_data, sigma2Dict) - # A-Posterior-precision - elif var == 'APP': - X_Posterior = X_MC[accepted] - # trace of the posterior parameters - U_J_d = -np.log(np.trace(np.cov(X_Posterior))) + # Check the Effective Sample Size (1<ESS<MCsize) + ESS = 1 / np.sum(np.square(likelihoods/np.nansum(likelihoods))) - else: - print('The algorithm you requested has not been implemented yet!') + # Enlarge sample size if it doesn't fulfill the criteria + if ((ESS > MCsize) or (ESS < 1)): + MCsize *= 10 + ESS = 0 - return -1 * U_J_d # -1 is for minimization instead of maximization + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] - # ------------------------------------------------------------------------- - def subdomain(self, Bounds, NrofNewSample): + # Reject the poorly performed prior + accepted = (likelihoods/np.max(likelihoods)) >= unif - NofPa = self.MetaModel.NofPa - NofSubdomain = NrofNewSample + 1 - LinSpace = np.zeros((NofPa, NofSubdomain)) + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) - for i in range(NofPa): - LinSpace[i] = np.linspace(start=Bounds[i][0], stop=Bounds[i][1], - num=NofSubdomain) - Subdomains = [] - for k in range(NofSubdomain-1): - mylist = [] - for i in range(NofPa): - mylist.append((LinSpace[i, k+0], LinSpace[i, k+1])) - Subdomains.append(tuple(mylist)) + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] + postExpLikelihoods = np.mean(np.log(postLikelihoods)) - return Subdomains + # Posterior-based expectation of prior densities + postExpPrior = np.mean(logPriorLikelihoods[accepted]) - # ------------------------------------------------------------------------- - def Utility_runner(self, method, allCandidates, index, sigma2Dict=None, - var=None, X_MC=None): + # Utility function Eq.2 in Ref. (2) + # Posterior covariance matrix after observing data y + # Kullback-Leibler Divergence (Sergey's paper) + if var == 'DKL': - if method == 'VarOptDesign': - U_J_d = self.util_VarBasedDesign(allCandidates, index, var) + # TODO: Calculate the correction factor for BME + # BMECorrFactor = self.BME_Corr_Weight(PCE_SparseBayes_can, + # ObservationData, sigma2Dict) + # BME += BMECorrFactor + # Haun et al implementation + # U_J_d = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) + U_J_d = postExpLikelihoods - logBME - elif method == 'BayesActDesign': - NCandidate = allCandidates.shape[0] - U_J_d = np.zeros((NCandidate)) - for idx, X_can in tqdm(enumerate(allCandidates), ascii=True, - desc="OptBayesianDesign"): - U_J_d[idx] = self.util_BayesianActiveDesign(X_can, sigma2Dict, - var) - elif method == 'BayesOptDesign': - NCandidate = allCandidates.shape[0] - U_J_d = np.zeros((NCandidate)) - for idx, X_can in tqdm(enumerate(allCandidates), ascii=True, - desc="OptBayesianDesign"): - U_J_d[idx] = self.util_BayesianDesign(X_can, X_MC, sigma2Dict, - var) - return (index, -1 * U_J_d) + # Marginal log likelihood + elif var == 'BME': + U_J_d = logBME - # ------------------------------------------------------------------------- - def dual_annealing(self, method, Bounds, sigma2Dict, var, Run_No, - debugFlag=False): + # Entropy-based information gain + elif var == 'infEntropy': + logBME = np.log(np.nanmean(likelihoods)) + infEntropy = logBME - postExpPrior - postExpLikelihoods + U_J_d = infEntropy * -1 # -1 for minimization - Model = self.Model - MaxFunItr = self.MetaModel.ExpDesign.MaxFunItr + # Bayesian information criterion + elif var == 'BIC': + coeffs = self.MetaModel.coeffs_dict.values() + nModelParams = max(len(v) for val in coeffs for v in val.values()) + maxL = np.nanmax(likelihoods) + U_J_d = -2 * np.log(maxL) + np.log(nObs) * nModelParams - if method == 'VarOptDesign': - Res_Global = opt.dual_annealing(self.util_VarBasedDesign, - bounds=Bounds, - args=(Model, var), - maxfun=MaxFunItr) + # Akaike information criterion + elif var == 'AIC': + coeffs = self.MetaModel.coeffs_dict.values() + nModelParams = max(len(v) for val in coeffs for v in val.values()) + maxlogL = np.log(np.nanmax(likelihoods)) + AIC = -2 * maxlogL + 2 * nModelParams + # 2 * nModelParams * (nModelParams+1) / (nObs-nModelParams-1) + penTerm = 0 + U_J_d = 1*(AIC + penTerm) - elif method == 'BayesOptDesign': - Res_Global = opt.dual_annealing(self.util_BayesianDesign, - bounds=Bounds, - args=(Model, sigma2Dict, var), - maxfun=MaxFunItr) + # Deviance information criterion + elif var == 'DIC': + # D_theta_bar = np.mean(-2 * Likelihoods) + N_star_p = 0.5 * np.var(np.log(likelihoods[likelihoods != 0])) + Likelihoods_theta_mean = self.__normpdf(Y_mean_can, Y_std_can, + obs_data, sigma2Dict) + DIC = -2 * np.log(Likelihoods_theta_mean) + 2 * N_star_p - if debugFlag: - print(f"global minimum: xmin = {Res_Global.x}, " - f"f(xmin) = {Res_Global.fun:.6f}, nfev = {Res_Global.nfev}") + U_J_d = DIC - return (Run_No, Res_Global.x) + else: + print('The algorithm you requested has not been implemented yet!') - # ------------------------------------------------------------------------- - def tradoffWeights(self, TradeOffScheme, OldExpDesign, OutputDictY): + # Handle inf and NaN (replace by zero) + if np.isnan(U_J_d) or U_J_d == -np.inf or U_J_d == np.inf: + U_J_d = 0.0 - if TradeOffScheme == 'None': - weightExploration = 0 + return -1 * U_J_d # -1 is for minimization instead of maximization - elif TradeOffScheme == 'equal': - weightExploration = 0.5 + # ------------------------------------------------------------------------- + def util_BayesianDesign(self, X_can, X_MC, sigma2Dict, var='DKL'): + """ + Computes scores based on Bayesian sequential design criterion (var). - elif TradeOffScheme == 'epsilon-decreasing': - # epsilon-decreasing scheme - # Start with more exploration and increase the influence of - # exploitation along the way with a exponential decay function - initNSamples = self.MetaModel.ExpDesign.initNrSamples - maxNSamples = self.MetaModel.ExpDesign.MaxNSamples + Parameters + ---------- + X_can : array of shape (n_samples, n_params) + Candidate samples. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + var : string, optional + Bayesian design criterion. The default is 'DKL'. - itrNumber = (self.MetaModel.ExpDesign.X.shape[0] - initNSamples) - itrNumber //= self.MetaModel.ExpDesign.NrofNewSample + Returns + ------- + float + Score. - tau2 = -(maxNSamples-initNSamples-1) / np.log(1e-8) - weightExploration = signal.exponential(maxNSamples-initNSamples, 0, - tau2, False)[itrNumber] + """ - elif TradeOffScheme == 'adaptive': + # To avoid changes ub original aPCE object + Model = self.Model + PCEModel = deepcopy(self.MetaModel) - # Extract itrNumber - initNSamples = self.MetaModel.ExpDesign.initNrSamples - maxNSamples = self.MetaModel.ExpDesign.MaxNSamples - itrNumber = (self.ExpDesign.X.shape[0] - initNSamples) - itrNumber //= self.ExpDesign.NrofNewSample + # Old Experimental design + oldExpDesignX = PCEModel.ExpDesign.X + oldExpDesignY = PCEModel.ExpDesign.Y - if itrNumber == 0: - weightExploration = 0.5 - else: - # # Extract the model errors from the last and next to last - # # iterations - # errorModel_i , errorModel_i_1 = self.errorModel[itrNumber], - # self.errorModel[itrNumber-1] + # Evaluate the PCE metamodels at that location ??? + Y_PC_can, _ = self.MetaModel.eval_metamodel(samples=np.array([X_can])) - # # Evaluate the error models for all selected samples so far - # eLCAllCands_i, _ = errorModel_i.eval_errormodel(OldExpDesign) - # eLCAllCands_i_1, _ = errorModel_i_1.eval_errormodel(OldExpDesign) + # Add all suggestion as new ExpDesign + NewExpDesignX = np.vstack((oldExpDesignX, X_can)) - # # Local improvement of LC error at last selected design - # sl_i = np.max(np.dstack(eLCAllCands_i.values())[-1]) - # sl_i_1 = np.max(np.dstack(eLCAllCands_i_1.values())[-1]) + NewExpDesignY = {} + for key in oldExpDesignY.keys(): + try: + NewExpDesignY[key] = np.vstack((oldExpDesignY[key], + Y_PC_can[key])) + except: + NewExpDesignY[key] = oldExpDesignY[key] - # p = sl_i**2 / sl_i_1**2 + PCEModel.ExpDesign.sampling_method = 'user' + PCEModel.ExpDesign.X = NewExpDesignX + PCEModel.ExpDesign.Y = NewExpDesignY - # # Global improvement of LC error at OldExpDesign - # sg_i = np.max(np.dstack(eLCAllCands_i.values()),axis=1) - # sg_i_1 = np.max(np.dstack(eLCAllCands_i_1.values()),axis=1) + # Create the SparseBayes-based PCE metamodel: + PCEModel.Inputs.polycoeffsFlag = False + univ_p_val = self.MetaModel.univ_basis_vals(X_can) + G_n_m_all = np.zeros((len(Model.Output.names), Model.nObs)) - # q = np.sum(np.square(sg_i)) / np.sum(np.square(sg_i_1)) + for idx, key in enumerate(Model.Output.names): + for i in range(Model.nObs): + BasisIndices = PCEModel.basis_dict[key]["y_"+str(i+1)] + clf_poly = PCEModel.clf_poly[key]["y_"+str(i+1)] + Mn = clf_poly.coef_ + Sn = clf_poly.sigma_ + beta = clf_poly.alpha_ + active = clf_poly.active_ + Psi = self.MetaModel.create_psi(BasisIndices, univ_p_val) - # weightExploration = min([0.5*p/q, 1]) + Sn_new_inv = np.linalg.inv(Sn) + Sn_new_inv += beta * np.dot(Psi[:, active].T, Psi[:, active]) + Sn_new = np.linalg.inv(Sn_new_inv) - # TODO: New adaptive trade-off according to Liu et al. (2017) - # Mean squared error for last design point - last_EDX = OldExpDesign[-1].reshape(1, -1) - lastPCEY, _ = self.MetaModel.eval_metamodel(samples=last_EDX) - pce_y = np.array(list(lastPCEY.values()))[:, 0] - y = np.array(list(OutputDictY.values())[1:])[:, -1, :] - mseError = mean_squared_error(pce_y, y) + Mn_new = np.dot(Sn_new_inv, Mn[active]).reshape(-1, 1) + Mn_new += beta * np.dot(Psi[:, active].T, Y_PC_can[key][0, i]) + Mn_new = np.dot(Sn_new, Mn_new).flatten() - # Mean squared CV - error for last design point - error = [] - for V in self.MetaModel.LCerror.values(): - for v in V.values(): - error.append(v[-1]) - mseCVError = np.mean(np.square(error)) - weightExploration = 0.99 * min([0.5*mseError/mseCVError, 1]) + # Compute new moments + mean_old = Mn[0] + mean_new = Mn_new[0] + std_old = np.sqrt(np.sum(np.square(Mn[1:]))) + std_new = np.sqrt(np.sum(np.square(Mn_new[1:]))) - return weightExploration + G_n_m = np.log(std_old/std_new) - 1./2 + G_n_m += std_new**2 / (2*std_new**2) + G_n_m += (mean_new - mean_old)**2 / (2*std_old**2) - # ------------------------------------------------------------------------- - def opt_SeqDesign(self, sigma2Dict, NCandidate=5, var='DKL'): + G_n_m_all[idx, i] = G_n_m - # Initialization - PCEModel = self.MetaModel - Model = self.Model - Bounds = PCEModel.BoundTuples - NrNewSample = PCEModel.ExpDesign.NrofNewSample - ExploreMethod = PCEModel.ExpDesign.ExploreMethod - ExploitMethod = PCEModel.ExpDesign.ExploitMethod - NrofCandGroups = PCEModel.ExpDesign.NrofCandGroups - TradeOffScheme = PCEModel.ExpDesign.TradeOffScheme - - OldExpDesign = PCEModel.ExpDesign.X - OutputDictY = PCEModel.ExpDesign.Y.copy() - ndim = PCEModel.ExpDesign.X.shape[1] + clf_poly.coef_[active] = Mn_new + clf_poly.sigma_ = Sn_new + PCEModel.clf_poly[key]["y_"+str(i+1)] = clf_poly - # ----------------------------------------- - # ----------- CUSTOMIZED METHODS ---------- - # ----------------------------------------- - # Utility function ExploitMethod provided by user - if ExploitMethod.lower() == 'user': + # return np.sum(G_n_m_all) + # PCEModel.train_norm_design(Model, verbose=True) + PCE_SparseBayes_can = PCEModel - Xnew, filteredSamples = PCEModel.ExpDesign.ExploitFunction(self) + # Get the data + obs_data = self.MetaModel.Observations - print("\n") - print("\nXnew:\n", Xnew) + # ---------- Inner MC simulation for computing Utility Value ---------- + # Estimation of the integral via Monte Varlo integration + MCsize = X_MC.shape[0] + ESS = 0 - return Xnew, filteredSamples + while ((ESS > MCsize) or (ESS < 1)): - # ----------------------------------------- - # ---------- EXPLORATION METHODS ---------- - # ----------------------------------------- - if ExploreMethod == 'dual annealing': - # ------- EXPLORATION: OPTIMIZATION ------- - import time - start_time = time.time() + # Enriching Monte Carlo samples if need be + if ESS != 0: + X_MC = self.MetaModel.ExpDesign.generate_samples(MCsize, + 'random') - # Divide the domain to subdomains - args = [] - Subdomains = self.subdomain(Bounds, NrNewSample) - for i in range(NrNewSample): - args.append((ExploitMethod, Subdomains[i], sigma2Dict, var, i)) + # Evaluate the PCEModel at the given samples + Y_MC, std_MC = PCE_SparseBayes_can.eval_metamodel(samples=X_MC) - # Multiprocessing - pool = multiprocessing.Pool(multiprocessing.cpu_count()) + # Likelihood computation (Comparison of data and simulation + # results via PCE with candidate design) + likelihoods = self.__normpdf(Y_MC, std_MC, obs_data, sigma2Dict) - # With Pool.starmap_async() - results = pool.starmap_async(self.dual_annealing, args).get() + # Check the Effective Sample Size (1<ESS<MCsize) + ESS = 1 / np.sum(np.square(likelihoods/np.sum(likelihoods))) - # Close the pool - pool.close() + # Enlarge sample size if it doesn't fulfill the criteria + if ((ESS > MCsize) or (ESS < 1)): + MCsize *= 10 + ESS = 0 - Xnew = np.array([results[NofE][1] for NofE in range(NrNewSample)]) + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] - print("\nXnew:\n", Xnew) + # Reject the poorly performed prior + accepted = (likelihoods/np.max(likelihoods)) >= unif - elapsed_time = time.time() - start_time - print("\n") - print(f"elapsed_time: {round(elapsed_time,2)} sec.") - print('-'*20) + # -------------------- Utility functions -------------------- + # Utility function Eq.2 in Ref. (2) + # Kullback-Leibler Divergence (Sergey's paper) + if var == 'DKL': - elif ExploreMethod == 'LOOCV': - # ----------------------------------------------------------------- - # TODO: LOOCV model construnction based on Feng et al. (2020) - # 'LOOCV': - # Initilize the ExploitScore array - OutputNames = list(OutputDictY.keys())[1:] + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) - # Generate random samples - allCandidates = PCEModel.ExpDesign.GetSample( - NCandidate, SamplingMethod='random' - ) + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] + postExpLikelihoods = np.mean(np.log(postLikelihoods)) - # Construct error model based on LCerror - errorModel = PCEModel.create_ModelError(OldExpDesign, self.LCerror) - self.errorModel.append(copy(errorModel)) + # Haun et al implementation + U_J_d = np.mean(np.log(likelihoods[likelihoods != 0])- logBME) - # Evaluate the error models for allCandidates - eLCAllCands, _ = errorModel.eval_errormodel(allCandidates) - # Select the maximum as the representative error - eLCAllCands = np.dstack(eLCAllCands.values()) - eLCAllCandidates = np.max(eLCAllCands, axis=1)[:, 0] + U_J_d = np.sum(G_n_m_all) + # Ryan et al (2014) implementation + # importanceWeights = Likelihoods[Likelihoods!=0]/np.sum(Likelihoods[Likelihoods!=0]) + # U_J_d = np.mean(importanceWeights*np.log(Likelihoods[Likelihoods!=0])) - logBME - # Normalize the error w.r.t the maximum error - scoreExploration = eLCAllCandidates / np.sum(eLCAllCandidates) + # U_J_d = postExpLikelihoods - logBME - else: - # ------- EXPLORATION: SPACE-FILLING DESIGN ------- - # Generate candidate samples from Exploration class - explore = Exploration(PCEModel, NCandidate) - explore.w = 100 # * ndim #500 - # Select criterion (mc-intersite-proj-th, mc-intersite-proj) - explore.mcCriterion = 'mc-intersite-proj' - allCandidates, scoreExploration = explore.getExplorationSamples() + # Marginal likelihood + elif var == 'BME': - # Temp: ---- Plot all candidates ----- - if ndim == 2: - def plotter(points, allCandidates, Method, - scoreExploration=None): - if Method == 'Voronoi': - from scipy.spatial import Voronoi, voronoi_plot_2d - vor = Voronoi(points) - fig = voronoi_plot_2d(vor) - ax1 = fig.axes[0] - else: - fig = plt.figure() - ax1 = fig.add_subplot(111) - ax1.scatter(points[:, 0], points[:, 1], s=10, c='r', - marker="s", label='Old Design Points') - ax1.scatter(allCandidates[:, 0], allCandidates[:, 1], s=10, - c='b', marker="o", label='Design candidates') - for i in range(points.shape[0]): - txt = 'p'+str(i+1) - ax1.annotate(txt, (points[i, 0], points[i, 1])) - if scoreExploration is not None: - for i in range(allCandidates.shape[0]): - txt = str(round(scoreExploration[i], 5)) - ax1.annotate(txt, (allCandidates[i, 0], - allCandidates[i, 1])) + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) + U_J_d = logBME - plt.xlim(self.BoundTuples[0]) - plt.ylim(self.BoundTuples[1]) - # plt.show() - plt.legend(loc='upper left') + # Bayes risk likelihood + elif var == 'BayesRisk': - # ----------------------------------------- - # --------- EXPLOITATION METHODS ---------- - # ----------------------------------------- - if ExploitMethod == 'BayesOptDesign' or\ - ExploitMethod == 'BayesActDesign': + U_J_d = -1 * np.var(likelihoods) - # ------- Calculate Exoploration weight ------- - # Compute exploration weight based on trade off scheme - weightExploration = self.tradoffWeights(TradeOffScheme, - OldExpDesign, - OutputDictY) - print(f"\nweightExploration={weightExploration:0.3f} " - f"weightExploitation={1- weightExploration:0.3f}") + # Entropy-based information gain + elif var == 'infEntropy': + # Prior-based estimation of BME + logBME = np.log(np.nanmean(likelihoods)) - # ------- EXPLOITATION: BayesOptDesign & ActiveLearning ------- - if weightExploration != 1.0: + # Posterior-based expectation of likelihoods + postLikelihoods = likelihoods[accepted] / np.nansum(likelihoods[accepted]) + postExpLikelihoods = np.mean(np.log(postLikelihoods)) - # Create a sample pool for rejection sampling - MCsize = 15000 - X_MC = PCEModel.ExpDesign.GetSample(MCsize, 'random') + # Posterior-based expectation of prior densities + postExpPrior = np.mean(logPriorLikelihoods[accepted]) - # Multiprocessing - pool = multiprocessing.Pool(multiprocessing.cpu_count()) + infEntropy = logBME - postExpPrior - postExpLikelihoods - # Split the candidates in groups for multiprocessing - split_cand = np.array_split(allCandidates, - NrofCandGroups, axis=0) - args = [] - for i in range(NrofCandGroups): - args.append((ExploitMethod, split_cand[i], i, sigma2Dict, - var, X_MC)) + U_J_d = infEntropy * -1 # -1 for minimization - # With Pool.starmap_async() - results = pool.starmap_async(self.Utility_runner, args).get() + # D-Posterior-precision + elif var == 'DPP': + X_Posterior = X_MC[accepted] + # covariance of the posterior parameters + U_J_d = -np.log(np.linalg.det(np.cov(X_Posterior))) - # Close the pool - pool.close() + # A-Posterior-precision + elif var == 'APP': + X_Posterior = X_MC[accepted] + # trace of the posterior parameters + U_J_d = -np.log(np.trace(np.cov(X_Posterior))) - # Retrieve the results and append them - U_J_d = np.concatenate([results[NofE][1] for NofE in - range(NrofCandGroups)]) + else: + print('The algorithm you requested has not been implemented yet!') - # Get the expected value (mean) of the Utility score - # for each cell - if ExploreMethod == 'Voronoi': - U_J_d = np.mean(U_J_d.reshape(-1, NCandidate), axis=1) + return -1 * U_J_d # -1 is for minimization instead of maximization - # Normalize U_J_d - norm_U_J_d = U_J_d / np.sum(U_J_d) - print("norm_U_J_d:\n", norm_U_J_d) - else: - norm_U_J_d = np.zeros((len(scoreExploration))) + # ------------------------------------------------------------------------- + def subdomain(self, Bounds, n_new_samples): + """ + Divides a domain defined by Bounds into sub domains. - # ------- Calculate Total score ------- - # ------- Trade off between EXPLORATION & EXPLOITATION ------- - # Total score - totalScore = (1 - weightExploration)*norm_U_J_d - totalScore += weightExploration*scoreExploration + Parameters + ---------- + Bounds : list of tuples + List of lower and upper bounds. + n_new_samples : TYPE + DESCRIPTION. - # temp: Plot - # dim = self.ExpDesign.X.shape[1] - # if dim == 2: - # plotter(self.ExpDesign.X, allCandidates, ExploreMethod) + Returns + ------- + Subdomains : TYPE + DESCRIPTION. - # ------- Select the best candidate ------- - # find an optimal point subset to add to the initial design by - # maximization of the utility score and taking care of NaN values - temp = totalScore.copy() - temp[np.isnan(totalScore)] = -np.inf - sorted_idxtotalScore = np.argsort(temp)[::-1] - bestIdx = sorted_idxtotalScore[:NrNewSample] + """ + n_params = self.MetaModel.n_params + n_subdomains = n_new_samples + 1 + LinSpace = np.zeros((n_params, n_subdomains)) - # select the requested number of samples - if ExploreMethod == 'Voronoi': - Xnew = np.zeros((NrNewSample, ndim)) - for i, idx in enumerate(bestIdx): - X_can = explore.closestPoints[idx] + for i in range(n_params): + LinSpace[i] = np.linspace(start=Bounds[i][0], stop=Bounds[i][1], + num=n_subdomains) + Subdomains = [] + for k in range(n_subdomains-1): + mylist = [] + for i in range(n_params): + mylist.append((LinSpace[i, k+0], LinSpace[i, k+1])) + Subdomains.append(tuple(mylist)) - # Calculate the maxmin score for the region of interest - newSamples, maxminScore = explore.getMCSamples(X_can) + return Subdomains - # select the requested number of samples - Xnew[i] = newSamples[np.argmax(maxminScore)] - else: - Xnew = allCandidates[sorted_idxtotalScore[:NrNewSample]] + # ------------------------------------------------------------------------- + def run_util_func(self, method, candidates, index, sigma2Dict=None, + var=None, X_MC=None): + """ + Runs the utility function based on the given method. - elif ExploitMethod == 'VarOptDesign': - # ------- EXPLOITATION: VarOptDesign ------- - UtilMethod = var + Parameters + ---------- + method : string + Exploitation method: `VarOptDesign`, `BayesActDesign` and + `BayesOptDesign`. + candidates : array of shape (n_samples, n_params) + All candidate parameter sets. + index : int + ExpDesign index. + sigma2Dict : dict, optional + A dictionary containing the measurement errors (sigma^2). The + default is None. + var : string, optional + Utility function. The default is None. + X_MC : TYPE, optional + DESCRIPTION. The default is None. - # ------- Calculate Exoploration weight ------- - # Compute exploration weight based on trade off scheme - weightExploration = self.tradoffWeights(TradeOffScheme, - OldExpDesign, - OutputDictY) - print(f"\nweightExploration={weightExploration:0.3f} " - f"weightExploitation={1- weightExploration:0.3f}") + Returns + ------- + index : TYPE + DESCRIPTION. + TYPE + DESCRIPTION. - # Generate candidate samples from Exploration class - OutputNames = list(OutputDictY.keys())[1:] - nMeasurement = OutputDictY[OutputNames[0]].shape[1] + """ - # Find indices of the Vornoi cells with samples - goodSampleIdx = [] - for idx in range(len(explore.closestPoints)): - if len(explore.closestPoints[idx]) != 0: - goodSampleIdx.append(idx) + if method.lower() == 'varoptdesign': + U_J_d = self.util_VarBasedDesign(candidates, index, var) - # Find sensitive region - if UtilMethod == 'LOOCV': - LCerror = PCEModel.LCerror - allModifiedLOO = np.zeros((len(OldExpDesign), len(OutputNames), - nMeasurement)) - for y_idx, y_key in enumerate(OutputNames): - for idx, key in enumerate(LCerror[y_key].keys()): - allModifiedLOO[:, y_idx, idx] = abs( - LCerror[y_key][key]) + elif method.lower() == 'bayesactdesign': + NCandidate = candidates.shape[0] + U_J_d = np.zeros((NCandidate)) + for idx, X_can in tqdm(enumerate(candidates), ascii=True, + desc="OptBayesianDesign"): + U_J_d[idx] = self.util_BayesianActiveDesign(X_can, sigma2Dict, + var) + elif method.lower() == 'bayesoptdesign': + NCandidate = candidates.shape[0] + U_J_d = np.zeros((NCandidate)) + for idx, X_can in tqdm(enumerate(candidates), ascii=True, + desc="OptBayesianDesign"): + U_J_d[idx] = self.util_BayesianDesign(X_can, X_MC, sigma2Dict, + var) + return (index, -1 * U_J_d) - ExploitScore = np.max(np.max(allModifiedLOO, axis=1), axis=1) + # ------------------------------------------------------------------------- + def dual_annealing(self, method, Bounds, sigma2Dict, var, Run_No, + verbose=False): + """ + Exploration algorithim to find the optimum parameter space. - elif UtilMethod in ['EIGF', 'Entropy']: - # ----- All other in ['EIGF', 'Entropy', 'ALM'] ----- - # Initilize the ExploitScore array - ExploitScore = np.zeros((len(OldExpDesign), len(OutputNames))) + Parameters + ---------- + method : string + Exploitation method: `VarOptDesign`, `BayesActDesign` and + `BayesOptDesign`. + Bounds : list of tuples + List of lower and upper boundaries of parameters. + sigma2Dict : dict + A dictionary containing the measurement errors (sigma^2). + Run_No : int + Run number. + verbose : bool, optional + Print out a summary. The default is False. - # Split the candidates in groups for multiprocessing - if ExploreMethod != 'Voronoi': - split_cand = np.array_split(allCandidates, - NrofCandGroups, - axis=0) - goodSampleIdx = range(NrofCandGroups) - else: - split_cand = explore.closestPoints + Returns + ------- + Run_No : int + Run number. + array + Optimial candidate. - # Split the candidates in groups for multiprocessing - args = [] - for index in goodSampleIdx: - args.append((ExploitMethod, split_cand[index], index, - sigma2Dict, var)) + """ - # Multiprocessing - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - # With Pool.starmap_async() - results = pool.starmap_async(self.Utility_runner, args).get() + Model = self.Model + MaxFunItr = self.MetaModel.ExpDesign.MaxFunItr - # Close the pool - pool.close() + if method == 'VarOptDesign': + Res_Global = opt.dual_annealing(self.util_VarBasedDesign, + bounds=Bounds, + args=(Model, var), + maxfun=MaxFunItr) - # Retrieve the results and append them - ExploitScore = np.concatenate([results[NofE][1] for NofE in - range(len(goodSampleIdx))]) + elif method == 'BayesOptDesign': + Res_Global = opt.dual_annealing(self.util_BayesianDesign, + bounds=Bounds, + args=(Model, sigma2Dict, var), + maxfun=MaxFunItr) - else: - raise NameError('The requested utility function is not ' - 'available.') + if verbose: + print(f"global minimum: xmin = {Res_Global.x}, " + f"f(xmin) = {Res_Global.fun:.6f}, nfev = {Res_Global.nfev}") - # print("ExploitScore:\n", ExploitScore) + return (Run_No, Res_Global.x) - # find an optimal point subset to add to the initial design by - # maximization of the utility score and taking care of NaN values - # Total score - # Normalize U_J_d - ExploitScore = ExploitScore / np.sum(ExploitScore) - totalScore = (1 - weightExploration) * ExploitScore - totalScore += weightExploration * scoreExploration + # ------------------------------------------------------------------------- + def tradoff_weights(self, tradeoff_scheme, old_EDX, old_EDY): + """ + Calculates weights for exploration scores based on the requested + scheme: `None`, `equal`, `epsilon-decreasing` and `adaptive`. + + `None`: No exploration. + `equal`: Same weights for exploration and exploitation scores. + `epsilon-decreasing`: Start with more exploration and increase the + influence of exploitation along the way with a exponential decay + function + `adaptive`: An adaptive method based on: + Liu, Haitao, Jianfei Cai, and Yew-Soon Ong. "An adaptive sampling + approach for Kriging metamodeling by maximizing expected prediction + error." Computers & Chemical Engineering 106 (2017): 171-182. - temp = totalScore.copy() - sorted_idxtotalScore = np.argsort(temp, axis=0)[::-1] - bestIdx = sorted_idxtotalScore[:NrNewSample] + Parameters + ---------- + tradeoff_scheme : string + Trade-off scheme for exloration and exploitation scores. + old_EDX : array (n_samples, n_params) + Old experimental design (training points). + old_EDY : dict + Old model responses (targets). - Xnew = np.zeros((NrNewSample, ndim)) - if ExploreMethod != 'Voronoi': - Xnew = allCandidates[bestIdx] - else: - for i, idx in enumerate(bestIdx.flatten()): - X_can = explore.closestPoints[idx] - # plotter(self.ExpDesign.X, X_can, ExploreMethod, - # scoreExploration=None) + Returns + ------- + exploration_weight : float + Exploration weight. + exploitation_weight: float + Exploitation weight. - # Calculate the maxmin score for the region of interest - newSamples, maxminScore = explore.getMCSamples(X_can) + """ + if tradeoff_scheme is None: + exploration_weight = 0 - # select the requested number of samples - Xnew[i] = newSamples[np.argmax(maxminScore)] + elif tradeoff_scheme == 'equal': + exploration_weight = 0.5 - elif ExploitMethod == 'alphabetic': - # ------- EXPLOITATION: ALPHABETIC ------- - Xnew = self.util_AlphOptDesign(allCandidates, var) + elif tradeoff_scheme == 'epsilon-decreasing': + # epsilon-decreasing scheme + # Start with more exploration and increase the influence of + # exploitation along the way with a exponential decay function + initNSamples = self.MetaModel.ExpDesign.initNrSamples + n_max_samples = self.MetaModel.ExpDesign.n_max_samples - elif ExploitMethod == 'Space-filling': - # ------- EXPLOITATION: SPACE-FILLING ------- - totalScore = scoreExploration + itrNumber = (self.MetaModel.ExpDesign.X.shape[0] - initNSamples) + itrNumber //= self.MetaModel.ExpDesign.n_new_samples - # ------- Select the best candidate ------- - # find an optimal point subset to add to the initial design by - # maximization of the utility score and taking care of NaN values - temp = totalScore.copy() - temp[np.isnan(totalScore)] = -np.inf - sorted_idxtotalScore = np.argsort(temp)[::-1] + tau2 = -(n_max_samples-initNSamples-1) / np.log(1e-8) + exploration_weight = signal.exponential(n_max_samples-initNSamples, + 0, tau2, False)[itrNumber] - # select the requested number of samples - Xnew = allCandidates[sorted_idxtotalScore[:NrNewSample]] + elif tradeoff_scheme == 'adaptive': - else: - raise NameError('The requested design method is not available.') + # Extract itrNumber + initNSamples = self.MetaModel.ExpDesign.initNrSamples + n_max_samples = self.MetaModel.ExpDesign.n_max_samples + itrNumber = (self.ExpDesign.X.shape[0] - initNSamples) + itrNumber //= self.ExpDesign.n_new_samples - print("\n") - print("\nRun No. {}:".format(OldExpDesign.shape[0]+1)) - print("Xnew:\n", Xnew) + if itrNumber == 0: + exploration_weight = 0.5 + else: + # # Extract the model errors from the last and next to last + # # iterations + # errorModel_i , errorModel_i_1 = self.errorModel[itrNumber], + # self.errorModel[itrNumber-1] - return Xnew, None + # # Evaluate the error models for all selected samples so far + # eLCAllCands_i, _ = errorModel_i.eval_errormodel(OldExpDesign) + # eLCAllCands_i_1, _ = errorModel_i_1.eval_errormodel(OldExpDesign) - # ------------------------------------------------------------------------- - def util_AlphOptDesign(self, allCandidates, var='D-Opt'): - """ - Enrich the Experimental design with the requested alphabetic criterion - based on exploring the space with number of sampling points. + # # Local improvement of LC error at last selected design + # sl_i = np.max(np.dstack(eLCAllCands_i.values())[-1]) + # sl_i_1 = np.max(np.dstack(eLCAllCands_i_1.values())[-1]) - Ref: Hadigol, M., & Doostan, A. (2018). Least squares polynomial chaos - expansion: A review of sampling strategies., Computer Methods in - Applied Mechanics and Engineering, 332, 382-407. + # p = sl_i**2 / sl_i_1**2 - Arguments - --------- - NCandidate : int - Number of candidate points to be searched + # # Global improvement of LC error at OldExpDesign + # sg_i = np.max(np.dstack(eLCAllCands_i.values()),axis=1) + # sg_i_1 = np.max(np.dstack(eLCAllCands_i_1.values()),axis=1) - var : string - Alphabetic optimality criterion + # q = np.sum(np.square(sg_i)) / np.sum(np.square(sg_i_1)) - Returns - ------- - X_new : ndarray[1, n] - The new sampling location in the input space. - """ - PCEModelOrig = self.PCEModel - Model = self.ModelObj - NrofNewSample = PCEModelOrig.ExpDesign.NrofNewSample - index = PCEModelOrig.index - NCandidate = allCandidates.shape[0] + # weightExploration = min([0.5*p/q, 1]) - # TODO: Loop over outputs - OutputName = Model.Output.names[0] + # TODO: New adaptive trade-off according to Liu et al. (2017) + # Mean squared error for last design point + last_EDX = old_EDX[-1].reshape(1, -1) + lastPCEY, _ = self.MetaModel.eval_metamodel(samples=last_EDX) + pce_y = np.array(list(lastPCEY.values()))[:, 0] + y = np.array(list(old_EDY.values())[1:])[:, -1, :] + mseError = mean_squared_error(pce_y, y) - # To avoid changes ub original aPCE object - PCEModel = deepcopy(PCEModelOrig) + # Mean squared CV - error for last design point + error = [] + for V in self.MetaModel.LCerror.values(): + for v in V.values(): + error.append(v[-1]) + mseCVError = np.mean(np.square(error)) + exploration_weight = 0.99 * min([0.5*mseError/mseCVError, 1]) - # Old Experimental design - oldExpDesignX = PCEModel.ExpDesign.X + # Exploitation weight + exploitation_weight = 1 - exploration_weight - # TODO: Only one psi can be selected. - # Suggestion: Go for the one with the highest LOO error - Scores = list(PCEModel.ScoresDict[OutputName].values()) - ModifiedLOO = [1-score for score in Scores] - outIdx = np.argmax(ModifiedLOO[:index]) + return exploration_weight, exploitation_weight - # Initialize Phi to save the criterion's values - Phi = np.zeros((NCandidate)) + # ------------------------------------------------------------------------- + def opt_SeqDesign(self, sigma2, n_candidates=5, var='DKL'): + """ + Runs optimal sequential design. - BasisIndices = PCEModelOrig.BasisDict[OutputName]["y_"+str(outIdx+1)] - P = len(BasisIndices) + Parameters + ---------- + sigma2 : dict, optional + A dictionary containing the measurement errors (sigma^2). The + default is None. + n_candidates : int, optional + Number of candidate samples. The default is 5. + var : string, optional + Utility function. The default is None. + + Raises + ------ + NameError + Wrong utility function. - # ------ Old Psi ------------ - univ_p_val = PCEModelOrig.univ_basis_vals(oldExpDesignX) - Psi = PCEModelOrig.PCE_create_Psi(BasisIndices, univ_p_val) + Returns + ------- + Xnew : array (n_samples, n_params) + Selected new training point(s). + """ - # ------ New candidates (Psi_c) ------------ - # Assemble Psi_c - univ_p_val_c = self.univ_basis_vals(allCandidates) - Psi_c = self.PCE_create_Psi(BasisIndices, univ_p_val_c) + # Initialization + PCEModel = self.MetaModel + Bounds = PCEModel.BoundTuples + n_new_samples = PCEModel.ExpDesign.n_new_samples + explore_method = PCEModel.ExpDesign.explore_method + exploit_method = PCEModel.ExpDesign.exploit_method + n_cand_groups = PCEModel.ExpDesign.n_cand_groups + tradeoff_scheme = PCEModel.ExpDesign.tradeoff_scheme + + old_EDX = PCEModel.ExpDesign.X + old_EDY = PCEModel.ExpDesign.Y.copy() + ndim = PCEModel.ExpDesign.X.shape[1] + OutputNames = PCEModel.ModelObj.Output.names - for idx in range(NCandidate): + # ----------------------------------------- + # ----------- CUSTOMIZED METHODS ---------- + # ----------------------------------------- + # Utility function exploit_method provided by user + if exploit_method.lower() == 'user': - # Include the new row to the original Psi - Psi_cand = np.vstack((Psi, Psi_c[idx])) + Xnew, filteredSamples = PCEModel.ExpDesign.ExploitFunction(self) - # Information matrix - PsiTPsi = np.dot(Psi_cand.T, Psi_cand) - M = PsiTPsi / (len(oldExpDesignX)+1) + print("\n") + print("\nXnew:\n", Xnew) - if np.linalg.cond(PsiTPsi) > 1e-12 \ - and np.linalg.cond(PsiTPsi) < 1 / sys.float_info.epsilon: - # faster - invM = linalg.solve(M, sparse.eye(PsiTPsi.shape[0]).toarray()) - else: - # stabler - invM = np.linalg.pinv(M) + return Xnew, filteredSamples - # ---------- Calculate optimality criterion ---------- - # Optimality criteria according to Section 4.5.1 in Ref. + # ----------------------------------------- + # ---------- EXPLORATION METHODS ---------- + # ----------------------------------------- + if explore_method == 'dual annealing': + # ------- EXPLORATION: OPTIMIZATION ------- + import time + start_time = time.time() - # D-Opt - if var == 'D-Opt': - Phi[idx] = (np.linalg.det(invM)) ** (1/P) + # Divide the domain to subdomains + args = [] + subdomains = self.subdomain(Bounds, n_new_samples) + for i in range(n_new_samples): + args.append((exploit_method, subdomains[i], sigma2, var, i)) - # A-Opt - elif var == 'A-Opt': - Phi[idx] = np.trace(invM) + # Multiprocessing + pool = multiprocessing.Pool(multiprocessing.cpu_count()) - # K-Opt - elif var == 'K-Opt': - Phi[idx] = np.linalg.cond(M) + # With Pool.starmap_async() + results = pool.starmap_async(self.dual_annealing, args).get() - else: - raise Exception('The optimality criterion you requested has ' - 'not been implemented yet!') + # Close the pool + pool.close() - # find an optimal point subset to add to the initial design - # by minimization of the Phi - sorted_idxtotalScore = np.argsort(Phi) + Xnew = np.array([results[i][1] for i in range(n_new_samples)]) - # select the requested number of samples - Xnew = allCandidates[sorted_idxtotalScore[:NrofNewSample]] + print("\nXnew:\n", Xnew) - return Xnew + elapsed_time = time.time() - start_time + print("\n") + print(f"elapsed_time: {round(elapsed_time,2)} sec.") + print('-'*20) - # ------------------------------------------------------------------------- - def normpdf(self, PCEOutputs, std_PC_MC, obs_data, Sigma2s): + elif explore_method == 'LOOCV': + # ----------------------------------------------------------------- + # TODO: LOOCV model construnction based on Feng et al. (2020) + # 'LOOCV': + # Initilize the ExploitScore array - output_names = self.Model.Output.names + # Generate random samples + allCandidates = PCEModel.ExpDesign.generate_samples(n_candidates, + 'random') - SampleSize, index = PCEOutputs[output_names[0]].shape - if type(self.MetaModel.index) is list: - index = self.MetaModel.index - else: - index = [self.MetaModel.index]*len(output_names) + # Construct error model based on LCerror + errorModel = PCEModel.create_ModelError(old_EDX, self.LCerror) + self.errorModel.append(copy(errorModel)) - # Flatten the ObservationData - TotalData = obs_data[output_names].to_numpy().flatten('F') + # Evaluate the error models for allCandidates + eLCAllCands, _ = errorModel.eval_errormodel(allCandidates) + # Select the maximum as the representative error + eLCAllCands = np.dstack(eLCAllCands.values()) + eLCAllCandidates = np.max(eLCAllCands, axis=1)[:, 0] - # Remove NaN - TotalData = TotalData[~np.isnan(TotalData)] - Sigma2s = Sigma2s[~np.isnan(Sigma2s)] - - # Flatten the Output - TotalOutputs = np.empty((SampleSize, 0)) - for idx, key in enumerate(output_names): - TotalOutputs = np.hstack((TotalOutputs, - PCEOutputs[key][:, :index[idx]])) - - # Covariance Matrix - covMatrix = np.zeros((Sigma2s.shape[0], Sigma2s.shape[0]), float) - np.fill_diagonal(covMatrix, Sigma2s) + # Normalize the error w.r.t the maximum error + scoreExploration = eLCAllCandidates / np.sum(eLCAllCandidates) - # Add the std of the PCE. - covMatrix_PCE = np.zeros((Sigma2s.shape[0], Sigma2s.shape[0]), float) - stdPCE = np.empty((SampleSize, 0)) - for idx, key in enumerate(output_names): - stdPCE = np.hstack((stdPCE, std_PC_MC[key][:, :index[idx]])) + else: + # ------- EXPLORATION: SPACE-FILLING DESIGN ------- + # Generate candidate samples from Exploration class + explore = Exploration(PCEModel, n_candidates) + explore.w = 100 # * ndim #500 + # Select criterion (mc-intersite-proj-th, mc-intersite-proj) + explore.mcCriterion = 'mc-intersite-proj' + allCandidates, scoreExploration = explore.getExplorationSamples() - # Expected value of variance (Assump: i.i.d stds) - varPCE = np.mean(stdPCE**2, axis=0) - # # varPCE = np.var(stdPCE, axis=1) - np.fill_diagonal(covMatrix_PCE, varPCE) + # Temp: ---- Plot all candidates ----- + if ndim == 2: + def plotter(points, allCandidates, Method, + scoreExploration=None): + if Method == 'Voronoi': + from scipy.spatial import Voronoi, voronoi_plot_2d + vor = Voronoi(points) + fig = voronoi_plot_2d(vor) + ax1 = fig.axes[0] + else: + fig = plt.figure() + ax1 = fig.add_subplot(111) + ax1.scatter(points[:, 0], points[:, 1], s=10, c='r', + marker="s", label='Old Design Points') + ax1.scatter(allCandidates[:, 0], allCandidates[:, 1], s=10, + c='b', marker="o", label='Design candidates') + for i in range(points.shape[0]): + txt = 'p'+str(i+1) + ax1.annotate(txt, (points[i, 0], points[i, 1])) + if scoreExploration is not None: + for i in range(allCandidates.shape[0]): + txt = str(round(scoreExploration[i], 5)) + ax1.annotate(txt, (allCandidates[i, 0], + allCandidates[i, 1])) - # Aggregate the cov matrices - covMatrix += covMatrix_PCE + plt.xlim(self.BoundTuples[0]) + plt.ylim(self.BoundTuples[1]) + # plt.show() + plt.legend(loc='upper left') - # Compute likelihood - self.Likelihoods = stats.multivariate_normal.pdf(TotalOutputs, - mean=TotalData, - cov=covMatrix, - allow_singular=True) - return self.Likelihoods + # ----------------------------------------- + # --------- EXPLOITATION METHODS ---------- + # ----------------------------------------- + if exploit_method == 'BayesOptDesign' or\ + exploit_method == 'BayesActDesign': - # ------------------------------------------------------------------------- - def posteriorPlot(self, Posterior, MAP, parNames, key, figsize=(10, 10)): + # ------- Calculate Exoploration weight ------- + # Compute exploration weight based on trade off scheme + explore_w, exploit_w = self.tradoff_weights(tradeoff_scheme, + old_EDX, + old_EDY) + print(f"\nweightExploration={explore_w:0.3f} " + f"weightExploitation={exploit_w:0.3f}") - # Initialization - newpath = (r'Outputs_SeqPosteriorComparison') - if not os.path.exists(newpath): - os.makedirs(newpath) + # ------- EXPLOITATION: BayesOptDesign & ActiveLearning ------- + if explore_w != 1.0: - lw = 3. - BoundTuples = self.BoundTuples - NofPa = len(MAP) + # Create a sample pool for rejection sampling + MCsize = 15000 + X_MC = PCEModel.ExpDesign.generate_samples(MCsize, 'random') - # This is the true mean of the second mode that we used above: - value1 = MAP + # Multiprocessing + pool = multiprocessing.Pool(multiprocessing.cpu_count()) - # This is the empirical mean of the sample: - value2 = np.mean(Posterior, axis=0) + # Split the candidates in groups for multiprocessing + split_cand = np.array_split(allCandidates, + n_cand_groups, axis=0) + args = [] + for i in range(n_cand_groups): + args.append((exploit_method, split_cand[i], i, sigma2, var, + X_MC)) - if NofPa == 2: + # With Pool.starmap_async() + results = pool.starmap_async(self.run_util_func, args).get() - figPosterior, ax = plt.subplots() - plt.hist2d(Posterior[:, 0], Posterior[:, 1], bins=(200, 200), - range=np.array([BoundTuples[0], BoundTuples[1]]), - cmap=plt.cm.jet) + # Close the pool + pool.close() - plt.xlabel(parNames[0]) - plt.ylabel(parNames[1]) + # Retrieve the results and append them + U_J_d = np.concatenate([results[NofE][1] for NofE in + range(n_cand_groups)]) - plt.xlim(BoundTuples[0]) - plt.ylim(BoundTuples[1]) + # Get the expected value (mean) of the Utility score + # for each cell + if explore_method == 'Voronoi': + U_J_d = np.mean(U_J_d.reshape(-1, n_candidates), axis=1) - ax.axvline(value1[0], color="g", lw=lw) - ax.axhline(value1[1], color="g", lw=lw) - ax.plot(value1[0], value1[1], "sg", lw=lw+1) + # Normalize U_J_d + norm_U_J_d = U_J_d / np.sum(U_J_d) + print("norm_U_J_d:\n", norm_U_J_d) + else: + norm_U_J_d = np.zeros((len(scoreExploration))) - ax.axvline(value2[0], ls='--', color="r", lw=lw) - ax.axhline(value2[1], ls='--', color="r", lw=lw) - ax.plot(value2[0], value2[1], "sr", lw=lw+1) + # ------- Calculate Total score ------- + # ------- Trade off between EXPLORATION & EXPLOITATION ------- + # Total score + totalScore = exploit_w * norm_U_J_d + totalScore += explore_w * scoreExploration - else: - import corner - figPosterior = corner.corner(Posterior, labels=parNames, - title_fmt='.2e', show_titles=True, - title_kwargs={"fontsize": 12}) + # temp: Plot + # dim = self.ExpDesign.X.shape[1] + # if dim == 2: + # plotter(self.ExpDesign.X, allCandidates, explore_method) - # Extract the axes - axes = np.array(figPosterior.axes).reshape((NofPa, NofPa)) + # ------- Select the best candidate ------- + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + temp = totalScore.copy() + temp[np.isnan(totalScore)] = -np.inf + sorted_idxtotalScore = np.argsort(temp)[::-1] + bestIdx = sorted_idxtotalScore[:n_new_samples] - # Loop over the diagonal - for i in range(NofPa): - ax = axes[i, i] - ax.axvline(value1[i], color="g") - ax.axvline(value2[i], ls='--', color="r") + # select the requested number of samples + if explore_method == 'Voronoi': + Xnew = np.zeros((n_new_samples, ndim)) + for i, idx in enumerate(bestIdx): + X_can = explore.closestPoints[idx] - # Loop over the histograms - for yi in range(NofPa): - for xi in range(yi): - ax = axes[yi, xi] - ax.axvline(value1[xi], color="g") - ax.axvline(value2[xi], ls='--', color="r") - ax.axhline(value1[yi], color="g") - ax.axhline(value2[yi], ls='--', color="r") - ax.plot(value1[xi], value1[yi], "sg") - ax.plot(value2[xi], value2[yi], "sr") + # Calculate the maxmin score for the region of interest + newSamples, maxminScore = explore.getMCSamples(X_can) - figPosterior.savefig(f'./{newpath}/{key}.svg', bbox_inches='tight') - plt.close() + # select the requested number of samples + Xnew[i] = newSamples[np.argmax(maxminScore)] + else: + Xnew = allCandidates[sorted_idxtotalScore[:n_new_samples]] - # Save the posterior as .npy - np.save(f'./{newpath}/{key}.npy', Posterior) + elif exploit_method == 'VarOptDesign': + # ------- EXPLOITATION: VarOptDesign ------- + UtilMethod = var - return figPosterior + # ------- Calculate Exoploration weight ------- + # Compute exploration weight based on trade off scheme + explore_w, exploit_w = self.tradoff_weights(tradeoff_scheme, + old_EDX, + old_EDY) + print(f"\nweightExploration={explore_w:0.3f} " + f"weightExploitation={exploit_w:0.3f}") - # ------------------------------------------------------------------------- - def hellinger_distance(self, P, Q): - """ - Hellinger distance between two continuous distributions. + # Generate candidate samples from Exploration class + nMeasurement = old_EDY[OutputNames[0]].shape[1] - The maximum distance 1 is achieved when P assigns probability zero to - every set to which Q assigns a positive probability, and vice versa. - 0 (identical) and 1 (maximally different) + # Find indices of the Vornoi cells with samples + goodSampleIdx = [] + for idx in range(len(explore.closestPoints)): + if len(explore.closestPoints[idx]) != 0: + goodSampleIdx.append(idx) - Parameters - ---------- - P : array - Reference likelihood. - Q : array - Estimated likelihood. + # Find sensitive region + if UtilMethod == 'LOOCV': + LCerror = PCEModel.LCerror + allModifiedLOO = np.zeros((len(old_EDX), len(OutputNames), + nMeasurement)) + for y_idx, y_key in enumerate(OutputNames): + for idx, key in enumerate(LCerror[y_key].keys()): + allModifiedLOO[:, y_idx, idx] = abs( + LCerror[y_key][key]) - Returns - ------- - float - Hellinger distance of two distributions. + ExploitScore = np.max(np.max(allModifiedLOO, axis=1), axis=1) - """ - mu1 = P.mean() - Sigma1 = np.std(P) + elif UtilMethod in ['EIGF', 'Entropy']: + # ----- All other in ['EIGF', 'Entropy', 'ALM'] ----- + # Initilize the ExploitScore array + ExploitScore = np.zeros((len(old_EDX), len(OutputNames))) - mu2 = Q.mean() - Sigma2 = np.std(Q) + # Split the candidates in groups for multiprocessing + if explore_method != 'Voronoi': + split_cand = np.array_split(allCandidates, + n_cand_groups, + axis=0) + goodSampleIdx = range(n_cand_groups) + else: + split_cand = explore.closestPoints - term1 = np.sqrt(2*Sigma1*Sigma2 / (Sigma1**2 + Sigma2**2)) + # Split the candidates in groups for multiprocessing + args = [] + for index in goodSampleIdx: + args.append((exploit_method, split_cand[index], index, + sigma2, var)) - term2 = np.exp(-.25 * (mu1 - mu2)**2 / (Sigma1**2 + Sigma2**2)) + # Multiprocessing + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + # With Pool.starmap_async() + results = pool.starmap_async(self.run_util_func, args).get() - H_squared = 1 - term1 * term2 + # Close the pool + pool.close() - return np.sqrt(H_squared) + # Retrieve the results and append them + ExploitScore = np.concatenate([results[NofE][1] for NofE in + range(len(goodSampleIdx))]) - # ------------------------------------------------------------------------- - def BME_Calculator(self, PCEModel, obs_data, sigma2Dict): - """ - This function computes the Bayesian model evidence (BME) via Monte - Carlo integration. + else: + raise NameError('The requested utility function is not ' + 'available.') - """ - # Initializations - validLikelihoods = PCEModel.validLikelihoods - PostSnapshot = PCEModel.ExpDesign.PostSnapshot - if PostSnapshot or len(validLikelihoods) != 0: - newpath = (r'Outputs_SeqPosteriorComparison') - if not os.path.exists(newpath): - os.makedirs(newpath) + # print("ExploitScore:\n", ExploitScore) - SamplingMethod = 'random' - MCsize = 100000 - ESS = 0 + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + # Total score + # Normalize U_J_d + ExploitScore = ExploitScore / np.sum(ExploitScore) + totalScore = exploit_w * ExploitScore + totalScore += explore_w * scoreExploration - # Estimation of the integral via Monte Varlo integration - while ((ESS > MCsize) or (ESS < 1)): + temp = totalScore.copy() + sorted_idxtotalScore = np.argsort(temp, axis=0)[::-1] + bestIdx = sorted_idxtotalScore[:n_new_samples] - # Generate samples for Monte Carlo simulation - if len(validLikelihoods) == 0: - X_MC = PCEModel.ExpDesign.GetSample(MCsize, SamplingMethod) + Xnew = np.zeros((n_new_samples, ndim)) + if explore_method != 'Voronoi': + Xnew = allCandidates[bestIdx] else: - X_MC = PCEModel.validSamples - MCsize = X_MC.shape[0] + for i, idx in enumerate(bestIdx.flatten()): + X_can = explore.closestPoints[idx] + # plotter(self.ExpDesign.X, X_can, explore_method, + # scoreExploration=None) - # Monte Carlo simulation for the candidate design - Y_MC, std_MC = PCEModel.eval_metamodel(samples=X_MC) + # Calculate the maxmin score for the region of interest + newSamples, maxminScore = explore.getMCSamples(X_can) - # Likelihood computation (Comparison of data and - # simulation results via PCE with candidate design) - Likelihoods = self.normpdf(Y_MC, std_MC, obs_data, sigma2Dict) + # select the requested number of samples + Xnew[i] = newSamples[np.argmax(maxminScore)] - # Check the Effective Sample Size (1000<ESS<MCsize) - ESS = 1 / np.sum(np.square(Likelihoods/np.sum(Likelihoods))) + elif exploit_method == 'alphabetic': + # ------- EXPLOITATION: ALPHABETIC ------- + Xnew = self.util_AlphOptDesign(allCandidates, var) - # Enlarge sample size if it doesn't fulfill the criteria - if ((ESS > MCsize) or (ESS < 1)): - print('ESS={0} MC size should be larger.'.format(ESS)) - MCsize = MCsize * 10 - ESS = 0 + elif exploit_method == 'Space-filling': + # ------- EXPLOITATION: SPACE-FILLING ------- + totalScore = scoreExploration - # Rejection Step - # Random numbers between 0 and 1 - unif = np.random.rand(1, MCsize)[0] + # ------- Select the best candidate ------- + # find an optimal point subset to add to the initial design by + # maximization of the utility score and taking care of NaN values + temp = totalScore.copy() + temp[np.isnan(totalScore)] = -np.inf + sorted_idxtotalScore = np.argsort(temp)[::-1] - # Reject the poorly performed prior - accepted = (Likelihoods/np.max(Likelihoods)) >= unif - X_Posterior = X_MC[accepted] + # select the requested number of samples + Xnew = allCandidates[sorted_idxtotalScore[:n_new_samples]] - # ------------------------------------------------------------ - # --- Kullback-Leibler Divergence & Information Entropy ------ - # ------------------------------------------------------------ - # Prior-based estimation of BME - logBME = np.log(np.nanmean(Likelihoods)) + else: + raise NameError('The requested design method is not available.') - # Posterior-based expectation of likelihoods - postExpLikelihoods = np.mean(np.log(Likelihoods[accepted])) + print("\n") + print("\nRun No. {}:".format(old_EDX.shape[0]+1)) + print("Xnew:\n", Xnew) - # Posterior-based expectation of prior densities - # postExpPrior = np.mean([log_prior(sample) for sample in X_Posterior]) + return Xnew, None - # Calculate Kullback-Leibler Divergence - # KLD = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) - KLD = postExpLikelihoods - logBME + # ------------------------------------------------------------------------- + def util_AlphOptDesign(self, candidates, var='D-Opt'): + """ + Enriches the Experimental design with the requested alphabetic + criterion based on exploring the space with number of sampling points. - # Information Entropy based on Entropy paper Eq. 38 - # infEntropy = logBME - postExpPrior - postExpLikelihoods + Ref: Hadigol, M., & Doostan, A. (2018). Least squares polynomial chaos + expansion: A review of sampling strategies., Computer Methods in + Applied Mechanics and Engineering, 332, 382-407. - # If PostSnapshot is True, plot likelihood vs refrence - if PostSnapshot or len(validLikelihoods) != 0: - idx = len([name for name in os.listdir(newpath) if 'Likelihoods_' - in name and os.path.isfile(os.path.join(newpath, name))]) - fig, ax = plt.subplots() - sns.kdeplot(np.log(validLikelihoods[validLikelihoods > 0]), - shade=True, color="g", label='Ref. Likelihood') - sns.kdeplot(np.log(Likelihoods[Likelihoods > 0]), shade=True, - color="b", label='Likelihood with PCE') + Arguments + --------- + NCandidate : int + Number of candidate points to be searched - # Hellinger distance - ref_like = np.log(validLikelihoods[validLikelihoods > 0]) - est_like = np.log(Likelihoods[Likelihoods > 0]) - distHellinger = self.hellinger_distance(ref_like, est_like) - text = f"Hellinger Dist.={distHellinger:.3f}\n logBME={logBME:.3f}" - "\n DKL={KLD:.3f}" + var : string + Alphabetic optimality criterion - plt.text(0.05, 0.75, text, bbox=dict(facecolor='wheat', - edgecolor='black', - boxstyle='round,pad=1'), - transform=ax.transAxes) + Returns + ------- + X_new : array of shape (1, n_params) + The new sampling location in the input space. + """ + PCEModelOrig = self.PCEModel + Model = self.ModelObj + n_new_samples = PCEModelOrig.ExpDesign.n_new_samples + NCandidate = candidates.shape[0] - fig.savefig(f'./{newpath}/Likelihoods_{idx}.svg', - bbox_inches='tight') - plt.close() + # TODO: Loop over outputs + OutputName = Model.Output.names[0] - else: - distHellinger = 0.0 + # To avoid changes ub original aPCE object + PCEModel = deepcopy(PCEModelOrig) - return (logBME, KLD, X_Posterior, distHellinger) + # Old Experimental design + oldExpDesignX = PCEModel.ExpDesign.X - # ------------------------------------------------------------------------- - def validError_(self): + # TODO: Only one psi can be selected. + # Suggestion: Go for the one with the highest LOO error + Scores = list(PCEModel.score_dict[OutputName].values()) + ModifiedLOO = [1-score for score in Scores] + outIdx = np.argmax(ModifiedLOO) - PCEModel = self.MetaModel - Model = self.Model - OutputName = Model.Output.names + # Initialize Phi to save the criterion's values + Phi = np.zeros((NCandidate)) - # Generate random samples - Samples = PCEModel.validSamples + BasisIndices = PCEModelOrig.basis_dict[OutputName]["y_"+str(outIdx+1)] + P = len(BasisIndices) - # Extract the original model with the generated samples - ModelOutputs = PCEModel.validModelRuns + # ------ Old Psi ------------ + univ_p_val = PCEModelOrig.univ_basis_vals(oldExpDesignX) + Psi = PCEModelOrig.create_psi(BasisIndices, univ_p_val) - # Run the PCE model with the generated samples - PCEOutputs, PCEOutputs_std = PCEModel.eval_metamodel(samples=Samples) + # ------ New candidates (Psi_c) ------------ + # Assemble Psi_c + univ_p_val_c = self.univ_basis_vals(candidates) + Psi_c = self.create_psi(BasisIndices, univ_p_val_c) - validError_dict = {} - # Loop over the keys and compute RMSE error. - for key in OutputName: - weight = np.mean(np.square(PCEOutputs_std[key]), axis=0) - if all(weight == 0): - weight = 'variance_weighted' - validError_dict[key] = mean_squared_error(ModelOutputs[key], - PCEOutputs[key]) - validError_dict[key] /= np.var(ModelOutputs[key], ddof=1) + for idx in range(NCandidate): - return validError_dict + # Include the new row to the original Psi + Psi_cand = np.vstack((Psi, Psi_c[idx])) - # ------------------------------------------------------------------------- - def error_Mean_Std(self): + # Information matrix + PsiTPsi = np.dot(Psi_cand.T, Psi_cand) + M = PsiTPsi / (len(oldExpDesignX)+1) - PCEModel = self.MetaModel - # Extract the mean and std provided by user - df_MCReference = PCEModel.ModelObj.MCReference + if np.linalg.cond(PsiTPsi) > 1e-12 \ + and np.linalg.cond(PsiTPsi) < 1 / sys.float_info.epsilon: + # faster + invM = linalg.solve(M, sparse.eye(PsiTPsi.shape[0]).toarray()) + else: + # stabler + invM = np.linalg.pinv(M) - # Compute the mean and std based on the PCEModel - PCEMeans = dict() - PCEStds = dict() - for Outkey, ValuesDict in PCEModel.CoeffsDict.items(): - PCEMean = np.zeros((len(ValuesDict))) - PCEStd = np.zeros((len(ValuesDict))) + # ---------- Calculate optimality criterion ---------- + # Optimality criteria according to Section 4.5.1 in Ref. - for Inkey, InIdxValues in ValuesDict.items(): - idx = int(Inkey.split('_')[1]) - 1 - coeffs = PCEModel.CoeffsDict[Outkey][Inkey] + # D-Opt + if var == 'D-Opt': + Phi[idx] = (np.linalg.det(invM)) ** (1/P) - # Mean = c_0 - if coeffs[0] != 0: - PCEMean[idx] = coeffs[0] - else: - PCEMean[idx] = PCEModel.clf_poly[Outkey][Inkey].intercept_ + # A-Opt + elif var == 'A-Opt': + Phi[idx] = np.trace(invM) - # Std = sqrt(sum(coeffs[1:]**2)) - PCEStd[idx] = np.sqrt(np.sum(np.square(coeffs[1:]))) + # K-Opt + elif var == 'K-Opt': + Phi[idx] = np.linalg.cond(M) - if PCEModel.DimRedMethod.lower() == 'pca': - PCA = PCEModel.pca[Outkey] - PCEMean = PCA.mean_ + np.dot(PCEMean, PCA.components_) - PCEStd = np.dot(PCEStd, PCA.components_) + else: + raise Exception('The optimality criterion you requested has ' + 'not been implemented yet!') - # Compute the error between mean and std of PCEModel and OrigModel - RMSE_Mean = mean_squared_error(df_MCReference['mean'], PCEMean, - squared=False) - RMSE_std = mean_squared_error(df_MCReference['std'], PCEStd, - squared=False) + # find an optimal point subset to add to the initial design + # by minimization of the Phi + sorted_idxtotalScore = np.argsort(Phi) - PCEMeans[Outkey] = PCEMean - PCEStds[Outkey] = PCEStd + # select the requested number of samples + Xnew = candidates[sorted_idxtotalScore[:n_new_samples]] - return RMSE_Mean, RMSE_std + return Xnew # ------------------------------------------------------------------------- - def create_PCE_SeqDesign(self, Model): + def __normpdf(self, PCEOutputs, std_PC_MC, obs_data, Sigma2s): - # Initialization - PCEModel = self.MetaModel - errorIncreases = False - PCEModel.SeqModifiedLOO = {} - PCEModel.seqValidError = {} - PCEModel.SeqBME = {} - PCEModel.SeqKLD = {} - PCEModel.SeqDistHellinger = {} - PCEModel.seqRMSEMean = {} - PCEModel.seqRMSEStd = {} - PCEModel.seqMinDist = [] - pce = True if PCEModel.metaModel.lower() != 'gpe' else False - mc_ref = True if hasattr(Model, 'MCReference') else False - if mc_ref: - Model.read_mc_reference() + output_names = self.Model.Output.names - # Get the parameters - max_n_samples = PCEModel.ExpDesign.MaxNSamples - mod_LOO_threshold = PCEModel.ExpDesign.ModifiedLOOThreshold - n_canddidate = PCEModel.ExpDesign.NCandidate - post_snapshot = PCEModel.ExpDesign.PostSnapshot - n_replication = PCEModel.ExpDesign.nReprications + SampleSize, index = PCEOutputs[output_names[0]].shape - # Handle if only one UtilityFunctions is provided - if not isinstance(PCEModel.ExpDesign.UtilityFunction, list): - util_func = [PCEModel.ExpDesign.UtilityFunction] + # Flatten the ObservationData + TotalData = obs_data[output_names].to_numpy().flatten('F') - # ---------- Initial PCEModel ---------- - PCEModel = PCEModel.create_PCE_normDesign(Model) - initPCEModel = deepcopy(PCEModel) + # Remove NaN + TotalData = TotalData[~np.isnan(TotalData)] + Sigma2s = Sigma2s[~np.isnan(Sigma2s)] - # TODO: Loop over outputs - OutputName = Model.Output.names + # Flatten the Output + TotalOutputs = np.empty((SampleSize, 0)) + for idx, key in enumerate(output_names): + TotalOutputs = np.hstack((TotalOutputs, PCEOutputs[key])) - # Estimation of the integral via Monte Varlo integration - obs_data = PCEModel.Observations + # Covariance Matrix + covMatrix = np.zeros((Sigma2s.shape[0], Sigma2s.shape[0]), float) + np.fill_diagonal(covMatrix, Sigma2s) - # Check if data is provided - TotalSigma2 = np.empty((0, 1)) - if len(obs_data) != 0: - # ------ Prepare diagonal enteries for co-variance matrix --------- - for keyIdx, key in enumerate(Model.Output.names): - # optSigma = 'B' - sigma2 = np.array(PCEModel.Discrepancy.Parameters[key]) - TotalSigma2 = np.append(TotalSigma2, sigma2) + # Add the std of the PCE. + covMatrix_PCE = np.zeros((Sigma2s.shape[0], Sigma2s.shape[0]), float) + stdPCE = np.empty((SampleSize, 0)) + for idx, key in enumerate(output_names): + stdPCE = np.hstack((stdPCE, std_PC_MC[key])) - # Calculate the initial BME - out = self.BME_Calculator(initPCEModel, obs_data, TotalSigma2) - initBME, initKLD, initPosterior, initDistHellinger = out - print("\nInitial BME:", initBME) - print("Initial KLD:", initKLD) + # Expected value of variance (Assump: i.i.d stds) + varPCE = np.mean(stdPCE**2, axis=0) + # # varPCE = np.var(stdPCE, axis=1) + np.fill_diagonal(covMatrix_PCE, varPCE) - # Posterior snapshot (initial) - if post_snapshot: - MAP = PCEModel.ExpDesign.MAP - parNames = PCEModel.ExpDesign.parNames - print('Posterior snapshot (initial) is being plotted...') - self.posteriorPlot(initPosterior, MAP, parNames, - 'SeqPosterior_init') + # Aggregate the cov matrices + covMatrix += covMatrix_PCE - # Check the convergence of the Mean & Std - if mc_ref and pce: - initRMSEMean, initRMSEStd = self.error_Mean_Std() - print(f"Initial Mean and Std error: {initRMSEMean}, {initRMSEStd}") + # Compute likelihood + self.Likelihoods = stats.multivariate_normal.pdf(TotalOutputs, + mean=TotalData, + cov=covMatrix, + allow_singular=True) + return self.Likelihoods - # Read the initial experimental design - Xinit = initPCEModel.ExpDesign.X - initTotalNSamples = len(PCEModel.ExpDesign.X) - initYprev = initPCEModel.ModelOutputDict - initLCerror = initPCEModel.LCerror + # ------------------------------------------------------------------------- + def __posteriorPlot(self, Posterior, MAP, parNames, key, figsize=(10, 10)): - # Read the initial ModifiedLOO - if pce: - Scores_all, varExpDesignY = [], [] - for OutName in OutputName: - y = initPCEModel.ExpDesign.Y[OutName] - Scores_all.append(list( - initPCEModel.ScoresDict[OutName].values())) - if PCEModel.DimRedMethod.lower() == 'pca': - pca = PCEModel.pca[OutName] - components = pca.transform(y) - varExpDesignY.append(np.var(components, axis=0)) - else: - varExpDesignY.append(np.var(y, axis=0)) + # Initialization + newpath = (r'Outputs_SeqPosteriorComparison') + if not os.path.exists(newpath): + os.makedirs(newpath) - Scores = [item for sublist in Scores_all for item in sublist] - weights = [item for sublist in varExpDesignY for item in sublist] - initModifiedLOO = [np.average([1-score for score in Scores], - weights=weights)] + lw = 3. + BoundTuples = self.BoundTuples + n_params = len(MAP) - if len(PCEModel.validModelRuns) != 0: - initValidError = self.validError_() - initValidError = list(initValidError.values()) - print("\nInitial ValidError:", initValidError) + # This is the true mean of the second mode that we used above: + value1 = MAP - # Replicate the sequential design - for repIdx in range(n_replication): - print(f'>>>> Replication: {repIdx+1}<<<<') + # This is the empirical mean of the sample: + value2 = np.mean(Posterior, axis=0) - # To avoid changes ub original aPCE object - # PCEModel = copy.deepcopy(initPCEModel) - PCEModel.ExpDesign.X = Xinit - PCEModel.ExpDesign.Y = initYprev - PCEModel.LCerror = initLCerror + if n_params == 2: - for util_f in util_func: - print(f'>>>> UtilityFunction: {util_f} <<<<') - # To avoid changes ub original aPCE object - # PCEModel = copy.deepcopy(initPCEModel) - PCEModel.ExpDesign.X = Xinit - PCEModel.ExpDesign.Y = initYprev - PCEModel.LCerror = initLCerror + figPosterior, ax = plt.subplots() + plt.hist2d(Posterior[:, 0], Posterior[:, 1], bins=(200, 200), + range=np.array([BoundTuples[0], BoundTuples[1]]), + cmap=plt.cm.jet) - # Set the experimental design - Xprev = Xinit - total_n_samples = initTotalNSamples - Yprev = initYprev + plt.xlabel(parNames[0]) + plt.ylabel(parNames[1]) - Xfull = [] - Yfull = [] + plt.xlim(BoundTuples[0]) + plt.ylim(BoundTuples[1]) - # Store the initial ModifiedLOO - if pce: - print("\nInitial ModifiedLOO:", initModifiedLOO) - ModifiedLOO = initModifiedLOO - SeqModifiedLOO = np.array(ModifiedLOO) + ax.axvline(value1[0], color="g", lw=lw) + ax.axhline(value1[1], color="g", lw=lw) + ax.plot(value1[0], value1[1], "sg", lw=lw+1) - if len(PCEModel.validModelRuns) != 0: - ValidError = initValidError - SeqValidError = np.array(ValidError) + ax.axvline(value2[0], ls='--', color="r", lw=lw) + ax.axhline(value2[1], ls='--', color="r", lw=lw) + ax.plot(value2[0], value2[1], "sr", lw=lw+1) - # Check if data is provided - if len(obs_data) != 0: - SeqBME = np.array([initBME]) - SeqKLD = np.array([initKLD]) - SeqDistHellinger = np.array([initDistHellinger]) + else: + import corner + figPosterior = corner.corner(Posterior, labels=parNames, + title_fmt='.2e', show_titles=True, + title_kwargs={"fontsize": 12}) - if mc_ref and pce: - seqRMSEMean = np.array([initRMSEMean]) - seqRMSEStd = np.array([initRMSEStd]) + # Extract the axes + axes = np.array(figPosterior.axes).reshape((n_params, n_params)) - # Start Sequential Experimental Design - postcnt = 1 - itrNr = 1 - while total_n_samples < max_n_samples: + # Loop over the diagonal + for i in range(n_params): + ax = axes[i, i] + ax.axvline(value1[i], color="g") + ax.axvline(value2[i], ls='--', color="r") - # Optimal Bayesian Design - PCEModel.ExpDesignFlag = 'sequential' - Xnew, updatedPrior = self.opt_SeqDesign(TotalSigma2, - n_canddidate, - util_f) + # Loop over the histograms + for yi in range(n_params): + for xi in range(yi): + ax = axes[yi, xi] + ax.axvline(value1[xi], color="g") + ax.axvline(value2[xi], ls='--', color="r") + ax.axhline(value1[yi], color="g") + ax.axhline(value2[yi], ls='--', color="r") + ax.plot(value1[xi], value1[yi], "sg") + ax.plot(value2[xi], value2[yi], "sr") - S = np.min(distance.cdist(Xinit, Xnew, 'euclidean')) - PCEModel.seqMinDist.append(S) - print("\nmin Dist from OldExpDesign:", S) - print("\n") + figPosterior.savefig(f'./{newpath}/{key}.svg', bbox_inches='tight') + plt.close() - # Evaluate the full model response at the new sample: - Ynew, _ = Model.run_model_parallel( - Xnew, prevRun_No=total_n_samples - ) - total_n_samples += Xnew.shape[0] + # Save the posterior as .npy + np.save(f'./{newpath}/{key}.npy', Posterior) - # ------ Plot the surrogate model vs Origninal Model ------ - if PCEModel.adaptVerbose: - from PostProcessing.adaptPlot import adaptPlot - y_hat, std_hat = PCEModel.eval_metamodel(samples=Xnew) - adaptPlot(PCEModel, Ynew, y_hat, std_hat, plotED=False) + return figPosterior - # -------- Retrain the surrogate model ------- - # Extend new experimental design - Xfull = np.vstack((Xprev, Xnew)) + # ------------------------------------------------------------------------- + def __hellinger_distance(self, P, Q): + """ + Hellinger distance between two continuous distributions. - # Updating existing key's value - for OutIdx in range(len(OutputName)): - OutName = OutputName[OutIdx] - try: - Yfull = np.vstack((Yprev[OutName], Ynew[OutName])) - except: - Yfull = np.vstack((Yprev[OutName], Ynew)) + The maximum distance 1 is achieved when P assigns probability zero to + every set to which Q assigns a positive probability, and vice versa. + 0 (identical) and 1 (maximally different) - PCEModel.ModelOutputDict[OutName] = Yfull + Parameters + ---------- + P : array + Reference likelihood. + Q : array + Estimated likelihood. - PCEModel.ExpDesign.SamplingMethod = 'user' - PCEModel.ExpDesign.X = Xfull - PCEModel.ExpDesign.Y = PCEModel.ModelOutputDict + Returns + ------- + float + Hellinger distance of two distributions. - # save the Experimental Design for next iteration - Xprev = Xfull - Yprev = PCEModel.ModelOutputDict + """ + mu1 = P.mean() + Sigma1 = np.std(P) - # Pass the new prior as the input - PCEModel.Inputs.polycoeffsFlag = False - if updatedPrior is not None: - PCEModel.Inputs.polycoeffsFlag = True - print("updatedPrior:", updatedPrior.shape) - # Arbitrary polynomial chaos - for i in range(updatedPrior.shape[1]): - PCEModel.Inputs.Marginals[i].DistType = None - x = updatedPrior[:, i] - PCEModel.Inputs.Marginals[i].InputValues = x + mu2 = Q.mean() + Sigma2 = np.std(Q) - prevPCEModel = PCEModel - PCEModel = PCEModel.create_PCE_normDesign(Model) + term1 = np.sqrt(2*Sigma1*Sigma2 / (Sigma1**2 + Sigma2**2)) - # -------- Evaluate the retrain surrogate model ------- - # Compute the validation error - if len(PCEModel.validModelRuns) != 0: - validError = self.validError_() - ValidError = list(validError.values()) - print("\nUpdated ValidError:", ValidError) + term2 = np.exp(-.25 * (mu1 - mu2)**2 / (Sigma1**2 + Sigma2**2)) - # Extract Modified LOO from Output - if pce: - Scores_all, varExpDesignY = [], [] - for OutName in OutputName: - y = initPCEModel.ExpDesign.Y[OutName] - Scores_all.append(list( - PCEModel.ScoresDict[OutName].values())) - if PCEModel.DimRedMethod.lower() == 'pca': - pca = PCEModel.pca[OutName] - components = pca.transform(y) - varExpDesignY.append(np.var(components, - axis=0)) - else: - varExpDesignY.append(np.var(y, axis=0)) - Scores = [item for sublist in Scores_all for item - in sublist] - weights = [item for sublist in varExpDesignY for item - in sublist] - ModifiedLOO = [np.average( - [1-score for score in Scores], weights=weights)] + H_squared = 1 - term1 * term2 - print('\n') - print(f"Updated ModifiedLOO {util_f}:\n", ModifiedLOO) - print("Xfull:", Xfull.shape) - print('\n') + return np.sqrt(H_squared) - # check the direction of the error (on average): - # if it increases consistently stop the iterations - n_checks = 3 - if itrNr > n_checks * PCEModel.ExpDesign.NrofNewSample: - # ss<0 error increasing - ss = np.sign(SeqModifiedLOO - ModifiedLOO) - errorIncreases = np.sum(np.mean(ss[-2:], axis=1)) <= \ - -1*n_checks + # ------------------------------------------------------------------------- + def __BME_Calculator(self, PCEModel, obs_data, sigma2Dict): + """ + This function computes the Bayesian model evidence (BME) via Monte + Carlo integration. - # If error is increasing in the last n_check iteration, - # stop the search and return the previous PCEModel - if errorIncreases: - print("Warning: The modified error is increasing " - "compared to the last {n_checks} iterations.") - PCEModel = prevPCEModel - break - else: - prevPCEModel = PCEModel + """ + # Initializations + valid_likelihoods = PCEModel.valid_likelihoods - # Store updated ModifiedLOO - if pce: - SeqModifiedLOO = np.vstack( - (SeqModifiedLOO, ModifiedLOO)) - if len(PCEModel.validModelRuns) != 0: - SeqValidError = np.vstack( - (SeqValidError, ValidError)) + post_snapshot = PCEModel.ExpDesign.post_snapshot + if post_snapshot or len(valid_likelihoods) != 0: + newpath = (r'Outputs_SeqPosteriorComparison') + if not os.path.exists(newpath): + os.makedirs(newpath) - # -------- Caclulation of BME as accuracy metric ------- - # Check if data is provided - if len(obs_data) != 0: - # Calculate the initial BME - out = self.BME_Calculator(PCEModel, obs_data, - TotalSigma2) - BME, KLD, Posterior, DistHellinger = out - print('\n') - print("Updated BME:", BME) - print("Updated KLD:", KLD) - print('\n') + SamplingMethod = 'random' + MCsize = 100000 + ESS = 0 - # Plot some snapshots of the posterior - stepSnapshot = PCEModel.ExpDesign.stepSnapshot - if post_snapshot and postcnt % stepSnapshot == 0: - MAP = PCEModel.ExpDesign.MAP - parNames = PCEModel.ExpDesign.parNames - print('Posterior snapshot is being plotted...') - self.posteriorPlot(Posterior, MAP, parNames, - 'SeqPosterior_{postcnt}') - postcnt += 1 + # Estimation of the integral via Monte Varlo integration + while ((ESS > MCsize) or (ESS < 1)): - # Check the convergence of the Mean&Std - if mc_ref and pce: - print('\n') - RMSE_Mean, RMSE_std = self.error_Mean_Std() - print(f"Updated Mean and Std error: {RMSE_Mean}, " - f"{RMSE_std}") - print('\n') + # Generate samples for Monte Carlo simulation + if len(valid_likelihoods) == 0: + X_MC = PCEModel.ExpDesign.generate_samples(MCsize, + SamplingMethod) + else: + X_MC = PCEModel.valid_samples + MCsize = X_MC.shape[0] - # Store the updated BME & KLD - # Check if data is provided - if len(obs_data) != 0: - SeqBME = np.vstack((SeqBME, BME)) - SeqKLD = np.vstack((SeqKLD, KLD)) - SeqDistHellinger = np.vstack((SeqDistHellinger, - DistHellinger)) - if mc_ref and pce: - seqRMSEMean = np.vstack((seqRMSEMean, RMSE_Mean)) - seqRMSEStd = np.vstack((seqRMSEStd, RMSE_std)) + # Monte Carlo simulation for the candidate design + Y_MC, std_MC = PCEModel.eval_metamodel(samples=X_MC) - if pce and any(LOO < mod_LOO_threshold - for LOO in ModifiedLOO): - break - itrNr += 1 - # Store updated ModifiedLOO and BME in dictonary - strKey = f'{util_f}_rep_{repIdx+1}' - if pce: - PCEModel.SeqModifiedLOO[strKey] = SeqModifiedLOO - if len(PCEModel.validModelRuns) != 0: - PCEModel.seqValidError[strKey] = SeqValidError + # Likelihood computation (Comparison of data and + # simulation results via PCE with candidate design) + Likelihoods = self.__normpdf(Y_MC, std_MC, obs_data, sigma2Dict) - # Check if data is provided - if len(obs_data) != 0: - PCEModel.SeqBME[strKey] = SeqBME - PCEModel.SeqKLD[strKey] = SeqKLD - if len(PCEModel.validLikelihoods) != 0: - PCEModel.SeqDistHellinger[strKey] = SeqDistHellinger - if mc_ref and pce: - PCEModel.seqRMSEMean[strKey] = seqRMSEMean - PCEModel.seqRMSEStd[strKey] = seqRMSEStd + # Check the Effective Sample Size (1000<ESS<MCsize) + ESS = 1 / np.sum(np.square(Likelihoods/np.sum(Likelihoods))) - return PCEModel + # Enlarge sample size if it doesn't fulfill the criteria + if ((ESS > MCsize) or (ESS < 1)): + print('ESS={0} MC size should be larger.'.format(ESS)) + MCsize = MCsize * 10 + ESS = 0 + + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, MCsize)[0] + + # Reject the poorly performed prior + accepted = (Likelihoods/np.max(Likelihoods)) >= unif + X_Posterior = X_MC[accepted] + + # ------------------------------------------------------------ + # --- Kullback-Leibler Divergence & Information Entropy ------ + # ------------------------------------------------------------ + # Prior-based estimation of BME + logBME = np.log(np.nanmean(Likelihoods)) + + # Posterior-based expectation of likelihoods + postExpLikelihoods = np.mean(np.log(Likelihoods[accepted])) + + # Posterior-based expectation of prior densities + # postExpPrior = np.mean([log_prior(sample) for sample in X_Posterior]) + + # Calculate Kullback-Leibler Divergence + # KLD = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) + KLD = postExpLikelihoods - logBME + + # Information Entropy based on Entropy paper Eq. 38 + # infEntropy = logBME - postExpPrior - postExpLikelihoods + + # If post_snapshot is True, plot likelihood vs refrence + if post_snapshot or len(valid_likelihoods) != 0: + idx = len([name for name in os.listdir(newpath) if 'Likelihoods_' + in name and os.path.isfile(os.path.join(newpath, name))]) + fig, ax = plt.subplots() + sns.kdeplot(np.log(valid_likelihoods[valid_likelihoods > 0]), + shade=True, color="g", label='Ref. Likelihood') + sns.kdeplot(np.log(Likelihoods[Likelihoods > 0]), shade=True, + color="b", label='Likelihood with PCE') + + # Hellinger distance + ref_like = np.log(valid_likelihoods[valid_likelihoods > 0]) + est_like = np.log(Likelihoods[Likelihoods > 0]) + distHellinger = self.__hellinger_distance(ref_like, est_like) + text = f"Hellinger Dist.={distHellinger:.3f}\n logBME={logBME:.3f}" + "\n DKL={KLD:.3f}" + + plt.text(0.05, 0.75, text, bbox=dict(facecolor='wheat', + edgecolor='black', + boxstyle='round,pad=1'), + transform=ax.transAxes) + + fig.savefig(f'./{newpath}/Likelihoods_{idx}.svg', + bbox_inches='tight') + plt.close() + + else: + distHellinger = 0.0 + + return (logBME, KLD, X_Posterior, distHellinger) + + # ------------------------------------------------------------------------- + def __validError(self): + + PCEModel = self.MetaModel + Model = self.Model + OutputName = Model.Output.names + + # Generate random samples + Samples = PCEModel.valid_samples + + # Extract the original model with the generated samples + ModelOutputs = PCEModel.valid_model_runs + + # Run the PCE model with the generated samples + PCEOutputs, PCEOutputs_std = PCEModel.eval_metamodel(samples=Samples) + + validError_dict = {} + # Loop over the keys and compute RMSE error. + for key in OutputName: + weight = np.mean(np.square(PCEOutputs_std[key]), axis=0) + if all(weight == 0): + weight = 'variance_weighted' + validError_dict[key] = mean_squared_error(ModelOutputs[key], + PCEOutputs[key]) + validError_dict[key] /= np.var(ModelOutputs[key], ddof=1) + + return validError_dict + + # ------------------------------------------------------------------------- + def __error_Mean_Std(self): + + PCEModel = self.MetaModel + # Extract the mean and std provided by user + df_MCReference = PCEModel.ModelObj.MCReference + + # Compute the mean and std based on the PCEModel + PCEMeans = dict() + PCEStds = dict() + for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): + PCEMean = np.zeros((len(ValuesDict))) + PCEStd = np.zeros((len(ValuesDict))) + + for Inkey, InIdxValues in ValuesDict.items(): + idx = int(Inkey.split('_')[1]) - 1 + coeffs = PCEModel.coeffs_dict[Outkey][Inkey] + + # Mean = c_0 + if coeffs[0] != 0: + PCEMean[idx] = coeffs[0] + else: + PCEMean[idx] = PCEModel.clf_poly[Outkey][Inkey].intercept_ + + # Std = sqrt(sum(coeffs[1:]**2)) + PCEStd[idx] = np.sqrt(np.sum(np.square(coeffs[1:]))) + + if PCEModel.dim_red_method.lower() == 'pca': + PCA = PCEModel.pca[Outkey] + PCEMean = PCA.mean_ + np.dot(PCEMean, PCA.components_) + PCEStd = np.dot(PCEStd, PCA.components_) + + # Compute the error between mean and std of PCEModel and OrigModel + RMSE_Mean = mean_squared_error(df_MCReference['mean'], PCEMean, + squared=False) + RMSE_std = mean_squared_error(df_MCReference['std'], PCEStd, + squared=False) + + PCEMeans[Outkey] = PCEMean + PCEStds[Outkey] = PCEStd + + return RMSE_Mean, RMSE_std diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py index 1451f505a7857ef7ce1ca44f3284d5a3136c85d8..c1edfda71635be1fab352d59a7b3b7c0c54723d8 100644 --- a/BayesValidRox/surrogate_models/surrogate_models.py +++ b/BayesValidRox/surrogate_models/surrogate_models.py @@ -1,9 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Created on Sat Aug 24 13:07:07 2019 - -@author: farid +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Sat Aug 24 2019 """ import warnings import numpy as np @@ -23,6 +29,7 @@ from joblib import Parallel, delayed from .ExpDesigns import ExpDesigns from .glexindex import glexindex +from .eval_rec_rule import eval_univ_basis from .RegressionFastARD import RegressionFastARD from .RegressionFastLaplace import RegressionFastLaplace from .bayes_linear import VBLinearRegression, EBLinearRegression @@ -30,215 +37,417 @@ warnings.filterwarnings("ignore") plt.style.use('../../bayesvalidrox.mplstyle') -class Metamodel: - - def __init__(self, InputClass): - - self.Inputs = InputClass - self.metaModel = 'PCE' - self.DisplayFlag = False - self.MaxPceDegree = None - self.MinPceDegree = 1 - self.q = 1.0 - self.P = None - self.RegMethod = None - self.DimRedMethod = 'no' - self.Discrepancy = None - self.ExpDesign = None - self.ExpDesignFlag = 'normal' - self.OptDesignFlag = False - self.ModelOutputDict = None - self.qNormDict = {} - self.Observations = [] - self.validModelRuns = [] - self.validSamples = [] - self.validLikelihoods = [] - self.index = None - self.adaptVerbose = False +class MetaModel: + """ Meta (surrogate) model + This class trains a surrogate model. It accepts an input object (input_obj) + containing the distribution specification of uncertain parameters and a + model object with instructions on how to run the computational model. + Two surrogate model types are supported: polynomial chaos expansion (`PCE`) + and Gaussian process regression (`GPE`). + Additionally, two training modes are supported: one-shot (`normal`) and + adaptive sequential (`sequential`) experimental design. + """ + + def __init__(self, input_obj, meta_model_type='PCE', pce_reg_method='OLS', + pce_deg=1, pce_q_norm=1.0, dim_red_method='no', + verbose=False): + + self.input_obj = input_obj + self.meta_model_type = meta_model_type + self.pce_reg_method = pce_reg_method + self.pce_deg = pce_deg + self.pce_q_norm = pce_q_norm + self.dim_red_method = dim_red_method + self.verbose = False # ------------------------------------------------------------------------- def create_metamodel(self, Model): + """ + Starts the training of the meta-model for the model objects containg + the given computational model. + + Parameters + ---------- + Model : object + Model object. + + Returns + ------- + metamodel : object + The meta model object. + """ self.ModelObj = Model - self.ExpDesign.initNrSamples = self.ExpDesign.NrSamples - self.NofPa = len(self.Inputs.Marginals) + self.n_params = len(self.input_obj.Marginals) + self.ExpDesignFlag = 'normal' + # --- Prepare pce degree --- + if self.meta_model_type.lower() == 'pce': + if type(self.pce_deg) is not np.ndarray: + self.pce_deg = np.array(self.pce_deg) if self.ExpDesign.Method == 'sequential': from .sequential_design import SeqDesign seq_design = SeqDesign(self) - metamodel = seq_design.create_PCE_SeqDesign(Model) + metamodel = seq_design.train_seq_design(Model) elif self.ExpDesign.Method == 'normal': - metamodel = self.create_PCE_normDesign(Model) + self.ExpDesignFlag = 'normal' + metamodel = self.train_norm_design(Model) else: raise Exception("The method for experimental design you requested" " has not been implemented yet.") - # Cleanup - # Zip the subdirectories - try: - dir_name = Model.name - key = f'{Model.name}_' - Model.zip_subdirs(dir_name, key) - except: - pass + # Zip the model run directories + if self.ModelObj.link_type.lower() == 'pylink': + Model.zip_subdirs(Model.name, f'{Model.name}_') return metamodel # ------------------------------------------------------------------------- - class AutoVivification(dict): - """Implementation of perl's AutoVivification feature.""" + def train_norm_design(self, Model, verbose=False): + """ + This function loops over the outputs and each time step/point and fits + the meta model. - def __getitem__(self, item): - try: - return dict.__getitem__(self, item) - except KeyError: - value = self[item] = type(self)() - return value + Parameters + ---------- + Model : object + Model object. + verbose : bool, optional + Flag for a sequential design in silent mode. The default is False. + + Returns + ------- + self: object + Meta-model object. + + """ + + # Get the collocation points to run the forward model + CollocationPoints, OutputDict = self.generate_ExpDesign(Model) + + # Initialize the nested dictionaries + self.deg_dict = self.auto_vivification() + self.q_norm_dict = self.auto_vivification() + self.coeffs_dict = self.auto_vivification() + self.basis_dict = self.auto_vivification() + self.score_dict = self.auto_vivification() + self.clf_poly = self.auto_vivification() + self.gp_poly = self.auto_vivification() + self.pca = self.auto_vivification() + self.LCerror = self.auto_vivification() + self.x_scaler = {} + + # Read observations or MCReference + if self.ExpDesignFlag != 'sequential': + if len(Model.observations) != 0: + self.Observations = Model.read_observation() + + # Define the DegreeArray + nSamples, ndim = CollocationPoints.shape + self.DegreeArray = self.__select_degree(ndim, nSamples) + + # Generate all basis indices + self.allBasisIndices = self.auto_vivification() + for deg in self.DegreeArray: + keys = self.allBasisIndices.keys() + if deg not in np.fromiter(keys, dtype=float): + # Generate the polynomial basis indices + for qidx, q in enumerate(self.pce_q_norm): + basis_indices = self.create_basis_indices(degree=deg, + q_norm=q) + self.allBasisIndices[str(deg)][str(q)] = basis_indices + + # Evaluate the univariate polynomials on ExpDesign + if self.meta_model_type.lower() != 'gpe': + self.univ_p_val = self.univ_basis_vals(CollocationPoints) + + if 'x_values' in OutputDict: + self.ExpDesign.x_values = OutputDict['x_values'] + del OutputDict['x_values'] + + # --- Loop through data points and fit the surrogate --- + if not verbose: + print(f"\n>>>> Training the {self.meta_model_type} metamodel " + "started. <<<<<<\n") + items = tqdm(OutputDict.items(), desc="Fitting regression") + else: + items = OutputDict.items() + + # For loop over the components/outputs + for key, Output in items: + + # Dimensionality reduction with PCA, if specified + if self.dim_red_method.lower() == 'pca': + self.pca[key], target = self.pca_transformation(Output) + else: + target = Output + + # Parallel fit regression + if self.meta_model_type.lower() == 'gpe': + # Prepare the input matrix + scaler = MinMaxScaler() + X_S = scaler.fit_transform(CollocationPoints) + + self.x_scaler[key] = scaler + + out = Parallel(n_jobs=-1, prefer='threads')( + delayed(self.gaussian_process_emulator)(X_S, target[:, idx]) + for idx in range(target.shape[1])) + + for idx in range(target.shape[1]): + self.gp_poly[key][f"y_{idx+1}"] = out[idx] + + else: + out = Parallel(n_jobs=-1, prefer='threads')( + delayed(self.adaptive_regression)(CollocationPoints, + target[:, idx], idx) + for idx in range(target.shape[1])) + + for i in range(target.shape[1]): + # Create a dict to pass the variables + self.deg_dict[key][f"y_{i+1}"] = out[i]['degree'] + self.q_norm_dict[key][f"y_{i+1}"] = out[i]['qnorm'] + self.coeffs_dict[key][f"y_{i+1}"] = out[i]['coeffs'] + self.basis_dict[key][f"y_{i+1}"] = out[i]['multi_indices'] + self.score_dict[key][f"y_{i+1}"] = out[i]['LOOCVScore'] + self.clf_poly[key][f"y_{i+1}"] = out[i]['clf_poly'] + self.LCerror[key][f"y_{i+1}"] = out[i]['LCerror'] + + if not verbose: + print(f"\n>>>> Training the {self.meta_model_type} metamodel" + " sucessfully completed. <<<<<<\n") + + return self + + # ------------------------------------------------------------------------- + def create_basis_indices(self, degree, q_norm): + """ + Creates set of selected multi-indices of multivariate polynomials for + certain parameter numbers, polynomial degree, hyperbolic (or q-norm) + truncation scheme. + + Parameters + ---------- + degree : int + Polynomial degree. + q_norm : float + hyperbolic (or q-norm) truncation. + + Returns + ------- + basis_indices : array (n_terms, n_params) + Multi-indices of multivariate polynomials. + + """ + basis_indices = glexindex(start=0, stop=degree+1, + dimensions=self.n_params, + cross_truncation=q_norm, + reverse=False, graded=True) + return basis_indices # ------------------------------------------------------------------------- - def PolyBasisIndices(self, degree, q): + def add_ExpDesign(self): + """ + Instanciates experimental design object. - self.PolynomialDegrees = glexindex(start=0, stop=degree+1, - dimensions=self.NofPa, - cross_truncation=q, - reverse=False, graded=True) - return self.PolynomialDegrees + Returns + ------- + None. + + """ + self.ExpDesign = ExpDesigns(self.input_obj, + meta_Model=self.meta_model_type) # ------------------------------------------------------------------------- - def addExpDesign(self): - self.ExpDesign = ExpDesigns(self.Inputs, metaModel=self.metaModel) + def generate_ExpDesign(self, Model): + """ + Prepares the experimental design either by reading from the prescribed + data or running simulations. + + Parameters + ---------- + Model : object + Model object. - def Exe_ExpDesign(self, Model): + Raises + ------ + Exception + If model sumulations are not provided properly. + Returns + ------- + ED_X_tr: array (n_samples, n_params) + Training samples transformed by an isoprobabilistic transformation. + ED_Y: dict + Model simulations (target) for all outputs. + """ + ExpDesign = self.ExpDesign if self.ExpDesignFlag != 'sequential': - # Read ExpDesign from the provided hdf5 - if self.ExpDesign.hdf5File is not None: + # Read ExpDesign (training and targets) from the provided hdf5 + if ExpDesign.hdf5_file is not None: # Read hdf5 file - f = h5py.File(self.ExpDesign.hdf5File, 'r+') + f = h5py.File(ExpDesign.hdf5_file, 'r+') # Read EDX and pass it to ExpDesign object try: - self.ExpDesign.X = np.array(f["EDX/New_init_"]) - except: - self.ExpDesign.X = np.array(f["EDX/init_"]) + ExpDesign.X = np.array(f["EDX/New_init_"]) + except KeyError: + ExpDesign.X = np.array(f["EDX/init_"]) - self.ExpDesign.NrSamples = self.ExpDesign.X.shape[0] + # Update number of initial samples + ExpDesign.n_init_samples = ExpDesign.X.shape[0] # Read EDX and pass it to ExpDesign object - OutputNames = self.ModelObj.Output.Names - self.ExpDesign.Y = {} + out_names = self.ModelObj.Output.Names + ExpDesign.Y = {} # Extract x values try: - self.ExpDesign.Y["x_values"] = dict() - for varIdx, var in enumerate(OutputNames): - self.ExpDesign.Y["x_values"][var] = np.array(f[f"x_values/{var}"]) - except: - self.ExpDesign.Y["x_values"] = np.array(f["x_values"]) - - for varIdx, var in enumerate(OutputNames): + ExpDesign.Y["x_values"] = dict() + for varIdx, var in enumerate(out_names): + x = np.array(f[f"x_values/{var}"]) + ExpDesign.Y["x_values"][var] = x + except KeyError: + ExpDesign.Y["x_values"] = np.array(f["x_values"]) + + # Store the output + for varIdx, var in enumerate(out_names): try: - self.ExpDesign.Y[var] = np.array(f[f"EDY/{var}/New_init_"]) - except: - self.ExpDesign.Y[var] = np.array(f[f"EDY/{var}/init_"]) + y = np.array(f[f"EDY/{var}/New_init_"]) + except KeyError: + y = np.array(f[f"EDY/{var}/init_"]) + ExpDesign.Y[var] = y f.close() else: # Check if an old hdf5 file exists: if yes, rename it - hdf5file = 'ExpDesign'+'_'+self.ModelObj.name+'.hdf5' + hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' if os.path.exists(hdf5file): os.rename(hdf5file, 'old_'+hdf5file) # ---- Prepare X samples ---- - ED_X, ED_X_tr = self.ExpDesign.GetSample(self.ExpDesign.NrSamples, - self.ExpDesign.SamplingMethod, - transform=True, - MaxPceDegree=self.MaxPceDegree) - self.ExpDesign.X = ED_X - self.ExpDesign.collocationPoints = ED_X_tr - self.BoundTuples = self.ExpDesign.BoundTuples + ED_X, ED_X_tr = ExpDesign.generate_ED(ExpDesign.n_init_samples, + ExpDesign.sampling_method, + transform=True, + max_pce_deg=np.max(self.pce_deg)) + ExpDesign.X = ED_X + ExpDesign.collocationPoints = ED_X_tr + self.BoundTuples = ExpDesign.BoundTuples # ---- Run simulations at X ---- - if self.ExpDesign.Y is None: + if not hasattr(ExpDesign, 'Y'): print('\n Now the forward model needs to be run!\n') ED_Y, up_ED_X = Model.run_model_parallel(ED_X) - self.ExpDesign.X = up_ED_X + ExpDesign.X = up_ED_X self.ModelOutputDict = ED_Y - self.ExpDesign.Y = ED_Y + ExpDesign.Y = ED_Y else: # Check if a dict has been passed. - if type(self.ExpDesign.Y) is dict: - self.ModelOutputDict = self.ExpDesign.Y + if type(ExpDesign.Y) is dict: + self.ModelOutputDict = ExpDesign.Y else: raise Exception('Please provide either a dictionary or a hdf5' - 'file to ExpDesign.hdf5File argument.') + 'file to ExpDesign.hdf5_file argument.') - return self.ExpDesign.collocationPoints + return ED_X_tr, self.ModelOutputDict # ------------------------------------------------------------------------- - def univ_basis_vals(self, ED_X, n_max=None): - """ - A function to evaluate univariate regressors along input directions. + def univ_basis_vals(self, samples, n_max=None): """ + Evaluates univariate regressors along input directions. + Parameters + ---------- + samples : array of shape (n_samples, n_params) + Samples. + n_max : int, optional + Maximum polynomial degree. The default is None. + + Returns + ------- + univ_basis: array of (n_samples, n_params, n_max+1) + All univariate regressors up to n_max. + + """ # Extract information - polyTypes = self.ExpDesign.Polytypes - if ED_X.ndim != 2: - ED_X = ED_X.reshape(1, len(ED_X)) - n_max = self.MaxPceDegree if n_max is None else n_max + poly_types = self.ExpDesign.poly_types + if samples.ndim != 2: + samples = samples.reshape(1, len(samples)) + n_max = np.max(self.pce_deg) if n_max is None else n_max - from .eval_rec_rule import eval_univ_basis + # Extract poly coeffs if self.ExpDesign.arbitrary: apolycoeffs = self.ExpDesign.polycoeffs else: apolycoeffs = None - return eval_univ_basis(ED_X, n_max, polyTypes, apolycoeffs) + + # Evaluate univariate basis + univ_basis = eval_univ_basis(samples, n_max, poly_types, apolycoeffs) + + return univ_basis # ------------------------------------------------------------------------- - def PCE_create_Psi(self, BasisIndices, univ_p_val): + def create_psi(self, basis_indices, univ_p_val): """ This function assemble the design matrix Psi from the given basis index set INDICES and the univariate polynomial evaluations univ_p_val. - """ + Parameters + ---------- + basis_indices : array of shape (n_terms, n_params) + Multi-indices of multivariate polynomials. + univ_p_val : array of (n_samples, n_params, n_max+1) + All univariate regressors up to n_max. + + Raises + ------ + ValueError + n_terms in arguments do not match. + + Returns + ------- + psi : array of shape (n_samples, n_terms) + Multivariate regressors. + + """ # Check if BasisIndices is a sparse matrix - sparsity = sp.sparse.issparse(BasisIndices) + sparsity = sp.sparse.issparse(basis_indices) if sparsity: - BasisIndices = BasisIndices.toarray() + basis_indices = basis_indices.toarray() # Initialization and consistency checks # number of input variables - NofPa = univ_p_val.shape[1] + n_params = univ_p_val.shape[1] # Size of the experimental design - NofSamples = univ_p_val.shape[0] + n_samples = univ_p_val.shape[0] - # number of basis elements - P = BasisIndices.shape[0] + # number of basis terms + n_terms = basis_indices.shape[0] # check that the variables have consistent sizes - if NofPa != BasisIndices.shape[1]: - raise ValueError("The shapes of BasisIndices and univ_p_val don't " - "match!!") + if n_params != basis_indices.shape[1]: + raise ValueError("The shapes of basis_indices and univ_p_val don't" + " match!!") # Preallocate the Psi matrix for performance - Psi = np.ones((NofSamples, P)) + psi = np.ones((n_samples, n_terms)) # Assemble the Psi matrix - for m in range(BasisIndices.shape[1]): - aa = np.where(BasisIndices[:, m] > 0)[0] + for m in range(basis_indices.shape[1]): + aa = np.where(basis_indices[:, m] > 0)[0] try: - basisIdx = BasisIndices[aa, m] - bb = np.reshape(univ_p_val[:, m, basisIdx], Psi[:, aa].shape) - Psi[:, aa] = np.multiply(Psi[:, aa], bb) - except: - raise ValueError + basisIdx = basis_indices[aa, m] + bb = np.reshape(univ_p_val[:, m, basisIdx], psi[:, aa].shape) + psi[:, aa] = np.multiply(psi[:, aa], bb) + except ValueError as err: + raise err - return Psi + return psi # ------------------------------------------------------------------------- - def RS_Builder(self, X, y, BasisIndices, RegMethod=None): + def fit(self, X, y, basis_indices, reg_method=None): """ Fit regression using the regression method provided. @@ -249,30 +458,25 @@ class Metamodel: n_features is the number of features. y : array-like of shape (n_samples,) Target values. - BasisIndices : TYPE - DESCRIPTION. - RegMethod : string, optional + basis_indices : array-like of shape (n_terms, n_params) + Multi-indices of multivariate polynomials. + reg_method : string, optional DESCRIPTION. The default is None. - Raises - ------ - ValueError - DESCRIPTION. - Returns ------- returnOuts : Dict Fitted estimator, spareMulti-Index, sparseX and coefficients. """ - if RegMethod is None: - RegMethod = self.RegMethod + if reg_method is None: + reg_method = self.pce_reg_method # Check if BasisIndices is a sparse matrix - sparsity = sp.sparse.issparse(BasisIndices) + sparsity = sp.sparse.issparse(basis_indices) clf_poly = [] - compute_score = True if self.DisplayFlag else False + compute_score = True if self.verbose else False # inverse of the observed variance of the data if np.var(y) != 0: @@ -281,8 +485,8 @@ class Metamodel: Lambda = 1e-6 # Bayes sparse adaptive aPCE - if RegMethod.lower() != 'ols': - if RegMethod.lower() == 'brr' or np.all(y == 0): + if reg_method.lower() != 'ols': + if reg_method.lower() == 'brr' or np.all(y == 0): clf_poly = lm.BayesianRidge(n_iter=1000, tol=1e-7, fit_intercept=True, normalize=True, @@ -291,7 +495,7 @@ class Metamodel: lambda_1=Lambda, lambda_2=Lambda) clf_poly.converged = True - elif RegMethod.lower() == 'ard': + elif reg_method.lower() == 'ard': clf_poly = lm.ARDRegression(fit_intercept=True, normalize=True, compute_score=compute_score, @@ -299,31 +503,31 @@ class Metamodel: alpha_1=1e-3, alpha_2=1e-3, lambda_1=Lambda, lambda_2=Lambda) - elif RegMethod.lower() == 'fastard': + elif reg_method.lower() == 'fastard': clf_poly = RegressionFastARD(fit_intercept=True, normalize=True, compute_score=compute_score, n_iter=300, tol=1e-10) - elif RegMethod.lower() == 'bcs': + elif reg_method.lower() == 'bcs': clf_poly = RegressionFastLaplace(fit_intercept=False, n_iter=1000, tol=1e-7) - elif RegMethod.lower() == 'lars': + elif reg_method.lower() == 'lars': clf_poly = lm.LassoLarsCV(fit_intercept=False) - elif RegMethod.lower() == 'sgdr': + elif reg_method.lower() == 'sgdr': clf_poly = lm.SGDRegressor(fit_intercept=False, max_iter=5000, tol=1e-7) - elif RegMethod.lower() == 'omp': + elif reg_method.lower() == 'omp': clf_poly = lm.OrthogonalMatchingPursuitCV(fit_intercept=False, max_iter=10) - elif RegMethod.lower() == 'vbl': + elif reg_method.lower() == 'vbl': clf_poly = VBLinearRegression(fit_intercept=False) - elif RegMethod.lower() == 'ebl': + elif reg_method.lower() == 'ebl': clf_poly = EBLinearRegression(optimizer='em') # Fit @@ -337,9 +541,9 @@ class Metamodel: nnz_idx = np.insert(np.nonzero(clf_poly.coef_)[0], 0, 0) # Remove the zero entries for Bases and PSI if need be if sparsity: - sparse_basis_indices = BasisIndices.toarray()[nnz_idx] + sparse_basis_indices = basis_indices.toarray()[nnz_idx] else: - sparse_basis_indices = BasisIndices[nnz_idx] + sparse_basis_indices = basis_indices[nnz_idx] sparse_X = X[:, nnz_idx] # Store the coefficients of the regression model @@ -349,18 +553,18 @@ class Metamodel: # This is for the case where all outputs are zero, thereby # all coefficients are zero if sparsity: - sparse_basis_indices = BasisIndices.toarray() + sparse_basis_indices = basis_indices.toarray() else: - sparse_basis_indices = BasisIndices + sparse_basis_indices = basis_indices sparse_X = X coeffs = clf_poly.coef_ # Ordinary least square method (OLS) else: if sparsity: - sparse_basis_indices = BasisIndices.toarray() + sparse_basis_indices = basis_indices.toarray() else: - sparse_basis_indices = BasisIndices + sparse_basis_indices = basis_indices sparse_X = X X_T_X = np.dot(sparse_X.T, sparse_X) @@ -382,7 +586,7 @@ class Metamodel: return returnOuts # -------------------------------------------------------------------------------------------------------- - def adaptiveSparseaPCE(self, ED_X, ED_Y, varIdx, verbose=False): + def adaptive_regression(self, ED_X, ED_Y, varIdx, verbose=False): """ Adaptively fits the PCE model by comparing the scores of different degrees and q-norm. @@ -406,7 +610,7 @@ class Metamodel: """ - NrSamples, NofPa = ED_X.shape + NrSamples, n_params = ED_X.shape # Initialization qAllCoeffs, AllCoeffs = {}, {} qAllIndices_Sparse, AllIndices_Sparse = {}, {} @@ -446,19 +650,19 @@ class Metamodel: BasisIndices = self.allBasisIndices[str(deg)][str(q)] # Assemble the Psi matrix - Psi = self.PCE_create_Psi(BasisIndices, self.univ_p_val) + Psi = self.create_psi(BasisIndices, self.univ_p_val) # Calulate the cofficients of the meta model - outs = self.RS_Builder(Psi, ED_Y, BasisIndices) + outs = self.fit(Psi, ED_Y, BasisIndices) # Calculate and save the score of LOOCV - score, LCerror = self.CorrectedLeaveoutError(outs['clf_poly'], - outs['sparePsi'], - outs['coeffs'], - ED_Y) + score, LCerror = self.corr_loocv_error(outs['clf_poly'], + outs['sparePsi'], + outs['coeffs'], + ED_Y) # Check the convergence of noise for FastARD - if self.RegMethod == 'FastARD' and \ + if self.pce_reg_method == 'FastARD' and \ outs['clf_poly'].alpha_ < np.finfo(np.float32).eps: score = -np.inf @@ -512,7 +716,7 @@ class Metamodel: # Select the one with the best score and save the necessary outputs best_deg = np.nanargmax(scores)+1 coeffs = AllCoeffs[str(best_deg)] - PolynomialDegrees = AllIndices_Sparse[str(best_deg)] + basis_indices = AllIndices_Sparse[str(best_deg)] clf_poly = Allclf_poly[str(best_deg)] LOOCVScore = np.nanmax(scores) P = AllnTerms[str(best_deg)] @@ -521,10 +725,10 @@ class Metamodel: qnorm = float(qnorm[best_q]) # ------------------ Print out Summary of results ------------------ - if self.DisplayFlag: + if self.verbose: # Create PSI_Sparse by removing redundent terms nnz_idx = np.nonzero(coeffs)[0] - BasisIndices_Sparse = PolynomialDegrees[nnz_idx] + BasisIndices_Sparse = basis_indices[nnz_idx] print(f'Output variable {varIdx+1}:') print('The estimation of PCE coefficients converged at polynomial ' @@ -542,17 +746,17 @@ class Metamodel: print("scores:\n", scores) print("Best score's degree:", self.DegreeArray[best_deg-1]) - print("NO. of terms:", len(PolynomialDegrees)) - print("Sparsity index:", round(len(PolynomialDegrees)/P, 3)) - print("Best Indices:\n", PolynomialDegrees) + print("NO. of terms:", len(basis_indices)) + print("Sparsity index:", round(len(basis_indices)/P, 3)) + print("Best Indices:\n", basis_indices) - if self.RegMethod in ['BRR', 'ARD']: + if self.pce_reg_method in ['BRR', 'ARD']: fig, ax = plt.subplots(figsize=(12, 10)) plt.title("Marginal log-likelihood") plt.plot(clf_poly.scores_, color='navy', linewidth=2) plt.ylabel("Score") plt.xlabel("Iterations") - if self.RegMethod.lower() == 'bbr': + if self.pce_reg_method.lower() == 'bbr': text = f"$\\alpha={clf_poly.alpha_:.1f}$\n" f"$\\lambda={clf_poly.lambda_:.3f}$\n" f"$L={clf_poly.scores_[-1]:.1f}$" @@ -570,14 +774,14 @@ class Metamodel: returnVars['degree'] = degree returnVars['qnorm'] = qnorm returnVars['coeffs'] = coeffs - returnVars['multi_indices'] = PolynomialDegrees + returnVars['multi_indices'] = basis_indices returnVars['LOOCVScore'] = LOOCVScore returnVars['LCerror'] = LCerror return returnVars # ------------------------------------------------------------------------- - def CorrectedLeaveoutError(self, clf, psi, coeffs, y): + def corr_loocv_error(self, clf, psi, coeffs, y): """ Calculates the corrected LOO error for the OLS regression on regressor matrix PSI that generated the coefficients. @@ -613,7 +817,7 @@ class Metamodel: nnz_idx = np.nonzero(coeffs)[0] if len(nnz_idx) == 0: nnz_idx = [0] - PSI_Sparse = psi[:, nnz_idx] + psi_sparse = psi[:, nnz_idx] # NrCoeffs of aPCEs P = len(nnz_idx) @@ -621,7 +825,7 @@ class Metamodel: N = psi.shape[0] # Build the projection matrix - PsiTPsi = np.dot(PSI_Sparse.T, PSI_Sparse) + PsiTPsi = np.dot(psi_sparse.T, psi_sparse) if np.linalg.cond(PsiTPsi) > 1e-12 and \ np.linalg.cond(PsiTPsi) < 1/sys.float_info.epsilon: @@ -634,9 +838,9 @@ class Metamodel: # h factor (the full matrix is not calculated explicitly, # only the trace is, to save memory) - PsiM = np.dot(PSI_Sparse, M) + PsiM = np.dot(psi_sparse, M) - h = np.sum(np.multiply(PsiM, PSI_Sparse), axis=1, dtype=np.float128) + h = np.sum(np.multiply(PsiM, psi_sparse), axis=1, dtype=np.float128) # ------ Calculate Error Loocv for each measurement point ---- # Residuals @@ -683,7 +887,7 @@ class Metamodel: return Q_2, residual # ------------------------------------------------------------------------- - def PCATransformation(self, Output): + def pca_transformation(self, Output): """ Transforms the targets (outputs) via Principal Component Analysis @@ -728,8 +932,8 @@ class Metamodel: return pca, OutputMatrix # ------------------------------------------------------------------------- - def GaussianProcessEmulator(self, X, y, nugTerm=None, autoSelect=False, - varIdx=None): + def gaussian_process_emulator(self, X, y, nugTerm=None, autoSelect=False, + varIdx=None): """ Fits a Gaussian Process Emulator to the target given the training points. @@ -799,158 +1003,111 @@ class Metamodel: return gp # ------------------------------------------------------------------------- - def create_PCE_normDesign(self, Model, OptDesignFlag=False): + def eval_metamodel(self, samples=None, nsamples=None, + sampling_method='random', return_samples=False): """ - This function loops over the outputs and each time step/point and fits - the meta model. + Evaluates meta-model at the requested samples. One can also generate + nsamples. Parameters ---------- - Model : object - Model object. - OptDesignFlag : bool, optional - Flag for a sequential design in silent mode. The default is False. + samples : array of shape (n_samples, n_params), optional + Samples to evaluate meta-model at. The default is None. + nsamples : int, optional + Number of samples to generate, if no `samples` is provided. The + default is None. + sampling_method : string, optional + Type of sampling, if no `samples` is provided. The default is + 'random'. + return_samples : bool, optional + Retun samples, if no `samples` is provided. The default is False. Returns ------- - self: object - Meta-model object. + TYPE + DESCRIPTION. """ - - # Get the collocation points to run the forward model - CollocationPoints = self.Exe_ExpDesign(Model) - OutputDict = self.ModelOutputDict.copy() - - # Initialize the nested dictionaries - self.DegreeDict = self.AutoVivification() - self.qNormDict = self.AutoVivification() - self.CoeffsDict = self.AutoVivification() - self.BasisDict = self.AutoVivification() - self.ScoresDict = self.AutoVivification() - self.clf_poly = self.AutoVivification() - self.gp_poly = self.AutoVivification() - self.pca = self.AutoVivification() - self.LCerror = self.AutoVivification() - self.x_scaler = {} - - # Read observations or MCReference - if self.ExpDesignFlag != 'sequential': - if len(Model.observations) != 0: - self.Observations = Model.read_observation() - - # Define the DegreeArray - nSamples, ndim = CollocationPoints.shape - self.DegreeArray = self.select_degree(ndim, nSamples) - - # Generate all basis indices - self.allBasisIndices = self.AutoVivification() - for deg in self.DegreeArray: - keys = self.allBasisIndices.keys() - if deg not in np.fromiter(keys, dtype=float): - # Generate the polynomial basis indices - for qidx, q in enumerate(self.q): - basis_indices = self.PolyBasisIndices(degree=deg, q=q) - self.allBasisIndices[str(deg)][str(q)] = basis_indices - - # Evaluate the univariate polynomials on ExpDesign - if self.metaModel.lower() != 'gpe': - self.univ_p_val = self.univ_basis_vals(CollocationPoints) - - if 'x_values' in OutputDict: - del OutputDict['x_values'] - - # --- Loop through data points and fit the surrogate --- - if not OptDesignFlag: - print(f"\n>>>> Training the {self.metaModel} metamodel " - "started. <<<<<<\n") - items = tqdm(OutputDict.items(), desc="Fitting regression") + if self.meta_model_type.lower() == 'gpe': + model_dict = self.gp_poly else: - items = OutputDict.items() + model_dict = self.coeffs_dict - # For loop over the components/outputs - for key, Output in items: - - # Dimensionality reduction with PCA, if specified - if self.DimRedMethod.lower() == 'pca': - self.pca[key], target = self.PCATransformation(Output) + if samples is None: + if nsamples is None: + self.n_samples = 100000 else: - target = Output + self.n_samples = nsamples - # Parallel fit regression - if self.metaModel.lower() == 'gpe': - # Prepare the input matrix - scaler = MinMaxScaler() - X_S = scaler.fit_transform(CollocationPoints) + samples = self.ExpDesign.generate_samples(self.n_samples, + sampling_method) + else: + self.samples = samples + self.n_samples = len(samples) - self.x_scaler[key] = scaler + # Transform samples + samples = self.ExpDesign.transform(samples) - out = Parallel(n_jobs=-1, prefer='threads')( - delayed(self.GaussianProcessEmulator)(X_S, target[:, idx]) - for idx in range(target.shape[1])) + if self.meta_model_type.lower() != 'gpe': + univ_p_val = self.univ_basis_vals(samples, + n_max=np.max(self.pce_deg)) - for idx in range(target.shape[1]): - self.gp_poly[key][f"y_{idx+1}"] = out[idx] - - else: - out = Parallel(n_jobs=-1, prefer='threads')( - delayed(self.adaptiveSparseaPCE)(CollocationPoints, - target[:, idx], idx) - for idx in range(target.shape[1])) + mean_pred = {} + std_pred = {} - for i in range(target.shape[1]): - # Create a dict to pass the variables - self.DegreeDict[key][f"y_{i+1}"] = out[i]['degree'] - self.qNormDict[key][f"y_{i+1}"] = out[i]['qnorm'] - self.CoeffsDict[key][f"y_{i+1}"] = out[i]['coeffs'] - self.BasisDict[key][f"y_{i+1}"] = out[i]['multi_indices'] - self.ScoresDict[key][f"y_{i+1}"] = out[i]['LOOCVScore'] - self.clf_poly[key][f"y_{i+1}"] = out[i]['clf_poly'] - self.LCerror[key][f"y_{i+1}"] = out[i]['LCerror'] + # Loop over outputs + for ouput, values in model_dict.items(): - if not OptDesignFlag: - print(f"\n>>>> Training the {self.metaModel} metamodel" - " sucessfully completed. <<<<<<\n") + mean = np.zeros((len(samples), len(values))) + std = np.zeros((len(samples), len(values))) + idx = 0 + for in_key, InIdxValues in values.items(): - return self + # Perdiction with GPE + if self.meta_model_type.lower() == 'gpe': + X_T = self.x_scaler[ouput].transform(samples) + gp = self.gp_poly[ouput][in_key] + y_mean, y_std = gp.predict(X_T, return_std=True) - # ------------------------------------------------------------------------- - def select_degree(self, ndim, nSamples): - # Define the DegreeArray - maxDeg = self.MaxPceDegree - nitr = nSamples - self.ExpDesign.initNrSamples + else: + # Perdiction with PCE or pcekriging + # Assemble Psi matrix + psi = self.create_psi(self.basis_dict[ouput][in_key], + univ_p_val) + # Perdiction + try: + # with error bar + clf_poly = self.clf_poly[ouput][in_key] + y_mean, y_std = clf_poly.predict(psi, return_std=True) - # Check q-norm - if not np.isscalar(self.q): - self.q = np.array(self.q) - else: - self.q = np.array([self.q]) + except: + # without error bar + coeffs = self.coeffs_dict[ouput][in_key] + y_mean = np.dot(psi, coeffs) + y_std = np.zeros_like(y_mean) - def M_uptoMax(maxDeg): - n_combo = np.zeros(maxDeg) - for i, d in enumerate(range(1, maxDeg+1)): - n_combo[i] = math.factorial(ndim+d) - n_combo[i] /= math.factorial(ndim) * math.factorial(d) - return n_combo + mean[:, idx] = y_mean + std[:, idx] = y_std + idx += 1 - if self.ExpDesignFlag != 'sequential': - degNew = self.MaxPceDegree - else: - d = nitr if nitr != 0 and self.NofPa > 5 else 1 - min_index = np.argmin(abs(M_uptoMax(maxDeg)-ndim*nSamples*d)) - degNew = range(1, maxDeg+1)[min_index] + if self.dim_red_method.lower() == 'pca': + PCA = self.pca[ouput] + mean_pred[ouput] = PCA.mean_ + std_pred[ouput] += np.dot(mean, PCA.components_) + std_pred[ouput] = np.sqrt(np.dot(std**2, PCA.components_**2)) + else: + mean_pred[ouput] = mean + std_pred[ouput] = std - if degNew > self.MinPceDegree and self.RegMethod.lower() != 'fastard': - DegreeArray = np.arange(self.MinPceDegree, degNew+1) + if return_samples: + return mean_pred, std_pred, samples else: - DegreeArray = np.array([degNew]) - - return DegreeArray + return mean_pred, std_pred # ------------------------------------------------------------------------- - def create_ModelError(self, X, y, Name='Calib'): + def create_model_error(self, X, y, name='Calib'): """ - This function fits a GPE-based model error. + Fits a GPE-based model error. Parameters ---------- @@ -959,23 +1116,23 @@ class Metamodel: extracted data. y : array-like of shape (n_outputs,) The model response for the MAP parameter set. - Name : string, optional + name : string, optional Calibration or validation. The default is 'Calib'. Returns ------- - TYPE - DESCRIPTION. + self: object + Self object. """ Model = self.ModelObj outputNames = Model.Output.Names self.errorRegMethod = 'GPE' - self.errorclf_poly = self.AutoVivification() - self.errorScale = self.AutoVivification() + self.errorclf_poly = self.auto_vivification() + self.errorScale = self.auto_vivification() # Read data - MeasuredData = Model.read_observation(case=Name) + MeasuredData = Model.read_observation(case=name) # Fitting GPR based bias model for out in outputNames: @@ -983,7 +1140,7 @@ class Metamodel: # Select data try: data = MeasuredData[out].values[nan_idx] - except: + except AttributeError: data = MeasuredData[out][nan_idx] # Prepare the input matrix @@ -991,7 +1148,7 @@ class Metamodel: delta = data # - y[out][0] BiasInputs = np.hstack((X[out], y[out].reshape(-1, 1))) X_S = scaler.fit_transform(BiasInputs) - gp = self.GaussianProcessEmulator(X_S, delta) + gp = self.gaussian_process_emulator(X_S, delta) self.errorScale[out]["y_1"] = scaler self.errorclf_poly[out]["y_1"] = gp @@ -999,14 +1156,32 @@ class Metamodel: return self # ------------------------------------------------------------------------- - def eval_errormodel(self, X, y_pred): - meanPCEOutputs = {} - stdPCEOutputs = {} + def eval_model_error(self, X, y_pred): + """ + Evaluates the error model. + + Parameters + ---------- + X : array + Inputs. + y_pred : dict + Predictions. + + Returns + ------- + mean_pred : dict + Mean predition of the GPE-based error model. + std_pred : dict + standard deviation of the GPE-based error model. + + """ + mean_pred = {} + std_pred = {} for Outkey, ValuesDict in self.errorclf_poly.items(): - PCEOutputs_mean = np.zeros_like(y_pred[Outkey]) - PCEOutputs_std = np.zeros_like(y_pred[Outkey]) + pred_mean = np.zeros_like(y_pred[Outkey]) + pred_std = np.zeros_like(y_pred[Outkey]) for Inkey, InIdxValues in ValuesDict.items(): @@ -1018,121 +1193,73 @@ class Metamodel: BiasInputs = np.hstack((X[Outkey], pred.reshape(-1, 1))) Samples_S = scaler.transform(BiasInputs) y_hat, y_std = gp.predict(Samples_S, return_std=True) - PCEOutputs_mean[j] = y_hat - PCEOutputs_std[j] = y_std - # PCEOutputs_mean[j] += pred + pred_mean[j] = y_hat + pred_std[j] = y_std + # pred_mean[j] += pred - meanPCEOutputs[Outkey] = PCEOutputs_mean - stdPCEOutputs[Outkey] = PCEOutputs_std + mean_pred[Outkey] = pred_mean + std_pred[Outkey] = pred_std - return meanPCEOutputs, stdPCEOutputs + return mean_pred, std_pred # ------------------------------------------------------------------------- - def eval_metamodel(self, samples=None, nsamples=None, Y=None, - discrepModel=False, samplingMethod='random', - name='Calib', return_samples=False): - if self.metaModel.lower() == 'gpe': - ModelDict = self.gp_poly - else: - ModelDict = self.CoeffsDict - - if samples is None: - if nsamples is None: - self.NrofSamples = 100000 - else: - self.NrofSamples = nsamples - - samples = self.ExpDesign.GetSample(self.NrofSamples, - samplingMethod, - transform=True) - else: - self.Samples = samples - self.NrofSamples = len(samples) - - # Check if samples need Transformation - samples = self.ExpDesign.transform(samples) - - if self.metaModel.lower() != 'gpe': - univ_p_val = self.univ_basis_vals(samples, n_max=self.MaxPceDegree) - - meanPCEOutputs = {} - stdPCEOutputs = {} + class auto_vivification(dict): + """Implementation of perl's AutoVivification feature.""" - # Loop over outputs - cnt = 0 - for Outkey, ValuesDict in ModelDict.items(): + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value - PCEOutputs_mean = np.zeros((len(samples), len(ValuesDict))) - PCEOutputs_std = np.zeros((len(samples), len(ValuesDict))) - idx = 0 - for Inkey, InIdxValues in ValuesDict.items(): + # ------------------------------------------------------------------------- + def __select_degree(self, ndim, nSamples): + """ + Selects degree based on the number of samples and parameters in the + sequential design. - # Perdiction with GPE - if self.metaModel.lower() == 'gpe': - X_T = self.x_scaler[Outkey].transform(samples) - gp = self.gp_poly[Outkey][Inkey] - y_mean, y_std = gp.predict(X_T, return_std=True) + Parameters + ---------- + ndim : TYPE + DESCRIPTION. + nSamples : TYPE + DESCRIPTION. - else: - # Perdiction with PCE or pcekriging - # Assemble Psi matrix - psi = self.PCE_create_Psi(self.BasisDict[Outkey][Inkey], - univ_p_val) - # Perdiction - try: - # with error bar - clf_poly = self.clf_poly[Outkey][Inkey] - y_mean, y_std = clf_poly.predict(psi, return_std=True) + Returns + ------- + TYPE + DESCRIPTION. - except: - # without error bar - coeffs = self.CoeffsDict[Outkey][Inkey] - y_mean = np.dot(psi, coeffs) - y_std = np.zeros_like(y_mean) + """ + # Define the DegreeArray + max_deg = np.max(self.pce_deg) + min_Deg = np.min(self.pce_deg) + nitr = nSamples - self.ExpDesign.n_init_samples - PCEOutputs_mean[:, idx] = y_mean - PCEOutputs_std[:, idx] = y_std - idx += 1 + # Check q-norm + if not np.isscalar(self.pce_q_norm): + self.pce_q_norm = np.array(self.pce_q_norm) + else: + self.pce_q_norm = np.array([self.pce_q_norm]) - if self.index is None: - if self.DimRedMethod.lower() == 'pca': - PCA = self.pca[Outkey] - meanPCEOutputs[Outkey] = PCA.mean_ - meanPCEOutputs[Outkey] += np.dot(PCEOutputs_mean, - PCA.components_) - stdPCEOutputs[Outkey] = np.sqrt(np.dot(PCEOutputs_std**2, - PCA.components_**2)) - else: - meanPCEOutputs[Outkey] = PCEOutputs_mean - stdPCEOutputs[Outkey] = PCEOutputs_std + def M_uptoMax(maxDeg): + n_combo = np.zeros(maxDeg) + for i, d in enumerate(range(1, maxDeg+1)): + n_combo[i] = math.factorial(ndim+d) + n_combo[i] /= math.factorial(ndim) * math.factorial(d) + return n_combo - else: - index = self.index[cnt] - if name.lower() == 'calib': - if self.DimRedMethod.lower() == 'pca': - PCA = self.pca[Outkey] - meanPCEOutputs[Outkey] = PCA.mean_[:index] - meanPCEOutputs[Outkey] += np.dot(PCEOutputs_mean, - PCA.components_)[:, :index] - stdPCEOutputs[Outkey] = np.sqrt(np.dot( - PCEOutputs_std**2, PCA.components_**2))[:, :index] - else: - meanPCEOutputs[Outkey] = PCEOutputs_mean[:, :index] - stdPCEOutputs[Outkey] = PCEOutputs_std[:, :index] - else: - if self.DimRedMethod.lower() == 'pca': - PCA = self.pca[Outkey] - meanPCEOutputs[Outkey] = PCA.mean_[index:] - meanPCEOutputs[Outkey] += np.dot(PCEOutputs_mean, - PCA.components_)[:, index:] - stdPCEOutputs[Outkey] = np.sqrt(np.dot( - PCEOutputs_std**2, PCA.components_**2))[:, index:] - else: - meanPCEOutputs[Outkey] = PCEOutputs_mean[:, index:] - stdPCEOutputs[Outkey] = PCEOutputs_std[:, index:] - cnt += 1 + if self.ExpDesignFlag != 'sequential': + degNew = max_deg + else: + d = nitr if nitr != 0 and self.n_params > 5 else 1 + min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*nSamples*d)) + degNew = range(1, max_deg+1)[min_index] - if return_samples: - return meanPCEOutputs, stdPCEOutputs, samples + if degNew > min_Deg and self.pce_reg_method.lower() != 'fastard': + DegreeArray = np.arange(min_Deg, degNew+1) else: - return meanPCEOutputs, stdPCEOutputs + DegreeArray = np.array([degNew]) + + return DegreeArray diff --git a/BayesValidRox/tests/AnalyticalFunction/AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/AnalyticalFunction.py index c4ed8e05c3ecd841f16d6b2d4757b12d5ecad325..0dbd4a97a4e000182e2f20afc8a535b79171e36d 100644 --- a/BayesValidRox/tests/AnalyticalFunction/AnalyticalFunction.py +++ b/BayesValidRox/tests/AnalyticalFunction/AnalyticalFunction.py @@ -7,74 +7,60 @@ Created on Wed Nov 20 14:48:43 2019 """ import numpy as np import scipy.stats as stats -import matplotlib.pyplot as plt import scipy.stats as st import seaborn as sns + def AnalyticalFunction(xx, t=None): """ - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % Analytical Non-Gaussian Function - % - % Authors: Farid Mohammadi, University of Stuttgart - % Sergey Oladyshkin, University of Stuttgart - % Questions/Comments: Please email Farid Mohammadi at: - % farid.mohammadi@iws.uni-stuttgart.de - % - % Copyright 2019. Farid Mohammadi, University of Stuttgart. - % - % THERE IS NO WARRANTY, EXPRESS OR IMPLIED. WE DO NOT ASSUME ANY LIABILITY - % FOR THE USE OF THIS SOFTWARE. If software is modified to produce - % derivative works, such modified software should be clearly marked. - % Additionally, this program is free software; you can redistribute it - % and/or modify it under the terms of the GNU General Public License as - % published by the Free Software Foundation; version 2.0 of the License. - % Accordingly, this program is distributed in the hope that it will be - % useful, but WITHOUT ANY WARRANTY; without even the implied warranty - % of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - % General Public License for more details. - % - % For function details and reference information, see: - % https://doi.org/10.3390/e21111081 - % - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % OUTPUT AND INPUTS: - % - % Output = row vector of time vectors (s, t) - % Its structure is: - % y(t_1), y(t_2), ..., y(t_dt) - % xx = [x1, x2, ..., xn] - % xn ~ Uinform(-5, 5) - % t = vector of times (optional), with default value - % ( k − 1 ) /9 and k = 1,..., 10 - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + Analytical Non-Gaussian Function + + Authors: Farid Mohammadi, University of Stuttgart + Sergey Oladyshkin, University of Stuttgart + Questions/Comments: Please email Farid Mohammadi at: + farid.mohammadi@iws.uni-stuttgart.de + + For function details and reference information, see: + https://doi.org/10.3390/e21111081 + + Parameters + ---------- + xx : array + [x1, x2, ..., xn] where xn ~ Uinform(-5, 5). + t : array, optional + vector of times. The default is None. ( k − 1 ) /9 and k = 1,..., 10 + + Returns + ------- + array + row vector of time vectors (s, t). + """ nParamSets, nParams = xx.shape - - if t is None: t = np.arange(0, 10, 1.) / 9 - - term1 = (xx[:,0]**2 + xx[:,1] - 1)**2 - - term2 = xx[:,0]**2 - - term3 = 0.1 * xx[:,0] * np.exp(xx[:,1]) - + + if t is None: + t = np.arange(0, 10, 1.) / 9 + + term1 = (xx[:, 0]**2 + xx[:, 1] - 1)**2 + + term2 = xx[:, 0]**2 + + term3 = 0.1 * xx[:, 0] * np.exp(xx[:, 1]) + term5 = 0 if nParams > 2: for i in range(2, nParams): - term5 = term5 + xx[:,i]**3/i - + term5 = term5 + xx[:, i]**3/i + const = term1 + term2 + term3 + 1 + term5 - + # Compute time dependent term - term4 = np.zeros((nParamSets,len(t))) + term4 = np.zeros((nParamSets, len(t))) for idx in range(nParamSets): - term4[idx] = -2 * xx[idx,0] * np.sqrt(0.5*t) - - Output = term4 + np.repeat(const[:,None], len(t), axis=1) - + term4[idx] = -2 * xx[idx, 0] * np.sqrt(0.5*t) + + Output = term4 + np.repeat(const[:, None], len(t), axis=1) + return np.vstack((t, Output)) if __name__ == "__main__": diff --git a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py index 18bdb95592a3025234a60d11a46bae978680f067..44c47281fad98e0d587c59ae11ebc11194991756 100755 --- a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py +++ b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py @@ -27,7 +27,7 @@ sys.path.insert(0, './../../../BayesValidRox') from PyLink.PyLinkForwardModel import PyLinkForwardModel from surrogate_models.Input import Input -from surrogate_models.surrogate_models import Metamodel +from surrogate_models.surrogate_models import MetaModel from PostProcessing.PostProcessing import PostProcessing from BayesInference.BayesInference import BayesInference, Discrepancy @@ -39,7 +39,7 @@ if __name__ == "__main__": # ===================================================== # ============= COMPUTATIONAL MODEL ================ # ===================================================== - Model = PyLinkForwardModel() # FuncForwardModel() + Model = PyLinkForwardModel() Model.link_type = 'Function' Model.py_file = 'AnalyticalFunction' @@ -70,29 +70,29 @@ if __name__ == "__main__": # for i in range(ndim): # Inputs.addMarginals() - # Inputs.Marginals[i].Name = '$X_{%s}$'%(i+1) - # Inputs.Marginals[i].DistType = 'unif' - # Inputs.Marginals[i].Parameters = [-5, 5] + # Inputs.Marginals[i].name = '$X_{%s}$'%(i+1) + # Inputs.Marginals[i].dist_type = 'unif' + # Inputs.Marginals[i].parameters = [-5, 5] # arbitrary polynomial chaos inputParams = np.load('data/InputParameters_{}.npy'.format(ndim)) for i in range(ndim): Inputs.addMarginals() - Inputs.Marginals[i].Name = f'$X_{i+1}$' - Inputs.Marginals[i].InputValues = inputParams[:, i] + Inputs.Marginals[i].name = f'$X_{i+1}$' + Inputs.Marginals[i].input_data = inputParams[:, i] # ===================================================== # ========== DEFINITION OF THE METAMODEL ============ # ===================================================== - MetaModelOpts = Metamodel(Inputs) + MetaModelOpts = MetaModel(Inputs) # Select if you want to preserve the spatial/temporal depencencies - # MetaModelOpts.DimRedMethod = 'PCA' + # MetaModelOpts.dim_red_method = 'PCA' # MetaModelOpts.varPCAThreshold = 100 #99.999 # Select your metamodel method # 1) PCE (Polynomial Chaos Expansion) 2) GPE (Gaussian Process Emulator) - MetaModelOpts.metaModel = 'PCE' + MetaModelOpts.meta_model_type = 'PCE' # ------------------------------------------------ # ------------- PCE Specification ---------------- @@ -104,43 +104,43 @@ if __name__ == "__main__": # 5)FastARD: Fast Bayesian ARD Regression # 6)VBL: Variational Bayesian Learning # 7)EBL: Emperical Bayesian Learning - MetaModelOpts.RegMethod = 'FastARD' + MetaModelOpts.pce_reg_method = 'FastARD' # Specify the max degree to be compared by the adaptive algorithm: # The degree with the lowest Leave-One-Out cross-validation (LOO) # error (or the highest score=1-LOO)estimator is chosen as the final - # metamodel. - MetaModelOpts.MinPceDegree = 6 # 12 - MetaModelOpts.MaxPceDegree = 6 # 12 + # metamodel. pce_deg accepts degree as a scalar or a range. + MetaModelOpts.pce_deg = np.arange(9) + # q-quasi-norm 0<q<1 (default=1) - MetaModelOpts.q = 0.75 if ndim < 5 else 0.5 - # MetaModelOpts.q = np.linspace(0.25,0.75, 3) + MetaModelOpts.pce_q_norm = 0.75 if ndim < 5 else 0.5 + # MetaModelOpts.pce_q_norm = np.linspace(0.25,0.75, 3) # Print summary of the regression results - # MetaModelOpts.DisplayFlag = True + # MetaModelOpts.verbose = True # ------------------------------------------------ # ------ Experimental Design Configuration ------- # ------------------------------------------------ - MetaModelOpts.addExpDesign() + MetaModelOpts.add_ExpDesign() # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.Method = 'sequential' + MetaModelOpts.ExpDesign.Method = 'normal' NrofInitSamples = 5*ndim # 50 5*ndim - MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples + MetaModelOpts.ExpDesign.n_init_samples = NrofInitSamples # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) korobov # 7) chebyshev(FT) 8) grid(FT) 9) nested_grid(FT) 10)user - MetaModelOpts.ExpDesign.SamplingMethod = 'latin_hypercube' + MetaModelOpts.ExpDesign.sampling_method = 'latin_hypercube' # Provide the experimental design object with a hdf5 file - # MetaModelOpts.ExpDesign.hdf5File = 'ExpDesign_AnalyticFunc.hdf5' + # MetaModelOpts.ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5' # Sequential experimental design (needed only for sequential ExpDesign) - MetaModelOpts.ExpDesign.NrofNewSample = 1 - MetaModelOpts.ExpDesign.MaxNSamples = 20 # 150 - MetaModelOpts.ExpDesign.ModifiedLOOThreshold = 1e-16 + MetaModelOpts.ExpDesign.n_new_samples = 1 + MetaModelOpts.ExpDesign.n_max_samples = 15 # 150 + MetaModelOpts.ExpDesign.mod_LOO_threshold = 1e-16 # Defining the measurement error, if it's known a priori obsData = pd.DataFrame(Model.observations, columns=Model.Output.names) @@ -150,56 +150,55 @@ if __name__ == "__main__": MetaModelOpts.Discrepancy = DiscrepancyOpts # Plot the posterior snapshots for SeqDesign - MetaModelOpts.ExpDesign.PostSnapshot = False - MetaModelOpts.ExpDesign.stepSnapshot = 1 - MetaModelOpts.ExpDesign.MAP = [0] * ndim - MetaModelOpts.ExpDesign.parNames = [f'$X_{i+1}$' for i in range(ndim)] + MetaModelOpts.ExpDesign.post_snapshot = False + MetaModelOpts.ExpDesign.step_snapshot = 1 + MetaModelOpts.ExpDesign.max_a_post = [0] * ndim # ------------------------------------------------ # ------- Sequential Design configuration -------- # ------------------------------------------------ - MetaModelOpts.adaptVerbose = True - # 1) 'None' 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive' - MetaModelOpts.ExpDesign.TradeOffScheme = 'None' + MetaModelOpts.adapt_verbose = True + # 1) None 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive' + MetaModelOpts.ExpDesign.tradeoff_scheme = None # MetaModelOpts.ExpDesign.nReprications = 20 # -------- Exploration ------ # 1)'Voronoi' 2)'random' 3)'latin_hypercube' 4)'LOOCV' 5)'dual annealing' - MetaModelOpts.ExpDesign.ExploreMethod = 'latin_hypercube' + MetaModelOpts.ExpDesign.explore_method = 'latin_hypercube' # Use when 'dual annealing' chosen MetaModelOpts.ExpDesign.MaxFunItr = 200 # Use when 'Voronoi' or 'random' or 'latin_hypercube' chosen - MetaModelOpts.ExpDesign.NCandidate = 5000 - MetaModelOpts.ExpDesign.NrofCandGroups = 4 # 8 + MetaModelOpts.ExpDesign.n_canddidate = 5000 + MetaModelOpts.ExpDesign.n_cand_groups = 4 # 8 # -------- Exploitation ------ # 1)'BayesOptDesign' 2)'BayesActDesign' 3)'VarOptDesign' 4)'alphabetic' # 5)'Space-filling' - MetaModelOpts.ExpDesign.ExploitMethod = 'VarOptDesign' + MetaModelOpts.ExpDesign.exploit_method = 'VarOptDesign' # BayesOptDesign -> when data is available # 1)DKL (Kullback-Leibler Divergence) 2)DPP (D-Posterior-percision) # 3)APP (A-Posterior-percision) # ['DKL', 'BME', 'infEntropy'] - # MetaModelOpts.ExpDesign.UtilityFunction = 'DKL' + # MetaModelOpts.ExpDesign.util_func = 'DKL' # VarBasedOptDesign -> when data is not available # Only with Vornoi >>> 1)Entropy 2)EIGF, 3)LOOCV # or a combination as a list - MetaModelOpts.ExpDesign.UtilityFunction = 'Entropy' + MetaModelOpts.ExpDesign.util_func = 'Entropy' # alphabetic # 1)D-Opt (D-Optimality) 2)A-Opt (A-Optimality) # 3)K-Opt (K-Optimality) or a combination as a list - # MetaModelOpts.ExpDesign.UtilityFunction = 'D-Opt' + # MetaModelOpts.ExpDesign.util_func = 'D-Opt' - # For colculation of validation error for SeqDesign + # For calculation of validation error for SeqDesign prior = np.load(f"data/Prior_{ndim}.npy") prior_outputs = np.load(f"data/origModelOutput_{ndim}.npy") likelihood = np.load(f"data/validLikelihoods_{ndim}.npy") - MetaModelOpts.validSamples = prior - MetaModelOpts.validModelRuns = {'Z': prior_outputs} - MetaModelOpts.validLikelihoods = likelihood + MetaModelOpts.valid_samples = prior + MetaModelOpts.valid_model_runs = {'Z': prior_outputs} + MetaModelOpts.valid_likelihoods = likelihood # >>>>>>>>>>>>>>>>>>>>>> Build Surrogate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Train the meta model @@ -222,7 +221,7 @@ if __name__ == "__main__": PostPCE.validMetamodel(nValidSamples=3) # Compute the moments and compare with the Monte-Carlo reference - if MetaModelOpts.metaModel != 'GPE': + if MetaModelOpts.meta_model_type != 'GPE': PostPCE.plotMoments() # Plot the evolution of the KLD,BME, and Modified LOOCV error @@ -231,7 +230,7 @@ if __name__ == "__main__": PostPCE.seqDesignDiagnosticPlots(refBME_KLD) # Plot the sobol indices - if MetaModelOpts.metaModel != 'GPE': + if MetaModelOpts.meta_model_type != 'GPE': sobol_cell, total_sobol = PostPCE.sobolIndicesPCE() # Compute and print RMSE error diff --git a/BayesValidRox/tests/BeamTest/Test_Beam.py b/BayesValidRox/tests/BeamTest/Test_Beam.py index 0d7d1a141d6ea07f1f5d61f6a5c8405974532020..a64fa99bdb2039b067d9b2fe9372e6f78e497a95 100644 --- a/BayesValidRox/tests/BeamTest/Test_Beam.py +++ b/BayesValidRox/tests/BeamTest/Test_Beam.py @@ -21,7 +21,7 @@ import pandas as pd import joblib from PyLink.PyLinkForwardModel import PyLinkForwardModel from surrogate_models.Input import Input -from surrogate_models.surrogate_models import Metamodel +from surrogate_models.surrogate_models import MetaModel from PostProcessing.PostProcessing import PostProcessing from BayesInference.BayesInference import BayesInference, Discrepancy @@ -81,7 +81,7 @@ if __name__ == "__main__": # ===================================================== # ========== DEFINITION OF THE METAMODEL ============ # ===================================================== - MetaModelOpts = Metamodel(Inputs) + MetaModelOpts = MetaModel(Inputs) # Select if you want to preserve the spatial/temporal depencencies # MetaModelOpts.DimRedMethod = 'PCA'