diff --git a/BayesValidRox/BayesInference/BayesInference.py b/BayesValidRox/BayesInference/BayesInference.py index 831392b0e7acb5d9b7805629ac01da5b145d23f1..a5ac810d0536e40327390ad6e490032200a1ad93 100644 --- a/BayesValidRox/BayesInference/BayesInference.py +++ b/BayesValidRox/BayesInference/BayesInference.py @@ -179,10 +179,10 @@ class BayesInference: """ from sklearn.gaussian_process.kernels import RBF - + formatting_function = np.vectorize(lambda f: format(f, '6.2E')) min_max_scaler = preprocessing.MinMaxScaler() X_minmax = min_max_scaler.fit_transform(X) - + nparams = len(Sigma2) # characteristic length (0,1] Lambda = Sigma2[0] @@ -192,7 +192,7 @@ class BayesInference: # cov_matrix = sigma2_f*rbf_kernel(X_minmax, gamma = 1/Lambda**2) rbf = RBF(length_scale=Lambda) - cov_matrix = sigma2_f*rbf(X_minmax) + cov_matrix = sigma2_f * rbf(X_minmax) if nparams > 2: # (unresolvable error) nugget term that is interpreted as random @@ -209,13 +209,13 @@ class BayesInference: Model = self.PCEModel.ModelObj logLik = 0.0 - formatting_function = np.vectorize(lambda f: format(f, '6.2E')) + formatting_function = np.vectorize(lambda f: format(f, '6.2e')) # Extract the requested model outputs for likelihood calulation if self.ReqOutputType is None: OutputType = Model.Output.Names else: OutputType = list(self.ReqOutputType) - + # Loop over the outputs and calculate logLikelihood for idx, out in enumerate(OutputType): @@ -235,12 +235,12 @@ class BayesInference: if Sigma2 is not None: # Check the type error term - if hasattr(self, 'BiasInputs'): + if hasattr(self, 'BiasInputs') and not hasattr(self, 'discrepancy_GP'): # TODO: Infer a Bias model usig Gaussian Process Regression - # BiasInputs = np.hstack((self.BiasInputs[out],TotalOutputs.T)) - EDY = self.PCEModel.ExpDesign.Y[out] - BiasInputs = np.hstack((self.BiasInputs[out],EDY.T)) - + BiasInputs = np.hstack((self.BiasInputs[out],TotalOutputs.T)) + # EDY = self.PCEModel.ExpDesign.Y[out] + # BiasInputs = np.hstack((self.BiasInputs[out],EDY.T)) + biasSigma2s = self.RBF_kernel(BiasInputs, Sigma2) # biasSigma2s = self.RBF_kernel(self.BiasInputs[out], Sigma2) else: @@ -251,23 +251,19 @@ class BayesInference: sigma2 = 0.0 # Convert biasSigma2s to a covMatrix - # biasSigma2s = np.diag( sigma2 * np.ones(shape=nout) ) + # biasSigma2s = sigma2 * np.eye(nout) biasSigma2s = np.diag( sigma2 * totalSigma2s) # Add Sigma2 to covMatrix # covMatrix += biasSigma2s covMatrix = biasSigma2s - # TODO: Infer a Bias model usig Gaussian Process Regression - if hasattr(self, 'BiasInputs'): - delta,covMatrix = self.discrepancy_GP.predict(theta, out) - # Add the std of the PCE is chosen as emulator. if self.emulator: stdPCE = std[out] if std is not None else np.mean(self._stdPCEPriorPred[out], axis=0) # Expected value of variance (Assump: i.i.d stds) covMatrix += np.diag(stdPCE**3) - # print(formatting_function(stdPCE**3)) + # print(formatting_function(covMatrix)) # Select the data points to compare if self.selectedIndices is not None: TotalOutputs = TotalOutputs[:,self.selectedIndices[out]] @@ -558,7 +554,11 @@ class BayesInference: PCEModel = self.PCEModel Model = PCEModel.ModelObj - + if hasattr(PCEModel, 'errorModel') and PCEModel.errorModel: + discrepancy = True + else: + discrepancy = False + # Make a directory to save the prior/posterior predictive OutputDir = (r'Outputs_Bayes_' + Model.Name + '_' + self.Name) if not os.path.exists(OutputDir): os.makedirs(OutputDir) @@ -595,7 +595,8 @@ class BayesInference: if self.emulator: if self.SamplingMethod == 'rejection': PriorPred = self.__meanPCEPriorPred - PosteriorPred, _ = PCEModel.eval_metamodel(samples=Posterior_df.to_numpy(),name=self.Name) + PosteriorPred, _ = PCEModel.eval_metamodel(samples=Posterior_df.to_numpy(), + name=self.Name,discrepModel=discrepancy) else: if self.SamplingMethod == 'rejection': PriorPred = self.__ModelPriorPred @@ -614,13 +615,13 @@ class BayesInference: # Check the type error term if Sigma2Prior is not None: # Inferred sigma2s - if hasattr(self, 'BiasInputs'): + if hasattr(self, 'BiasInputs') and not hasattr(self, 'discrepancy_GP'): # TODO: Infer a Bias model usig Gaussian Process Regression EDY = self.PCEModel.ExpDesign.Y[var] - BiasInputs = np.hstack((self.BiasInputs[var],EDY.T)) - # BiasInputs = np.hstack((self.BiasInputs[var],pred.reshape(-1,1))) - biasSigma2s = self.RBF_kernel(BiasInputs,sigma2s[i]) - # biasSigma2s = self.RBF_kernel(self.BiasInputs[var], sigma2s[i]) + # BiasInputs = np.hstack((self.BiasInputs[var],EDY.T)) + BiasInputs = np.hstack((self.BiasInputs[var],pred.reshape(-1,1))) + cov = self.RBF_kernel(BiasInputs,sigma2s[i]) + # cov = self.RBF_kernel(self.BiasInputs[var], sigma2s[i]) else: # Infer equal sigma2s try: @@ -629,24 +630,11 @@ class BayesInference: sigma2 = 0.0 # Convert biasSigma2s to a covMatrix - # biasSigma2s = np.diag(sigma2 * np.ones(len(pred))) - biasSigma2s = np.diag( sigma2 * totalSigma2) - - # cov += biasSigma2s - cov = biasSigma2s - - # TODO: Infer a Bias model usig Gaussian Process Regression - if hasattr(self, 'BiasInputs'): - pred,cov = self.discrepancy_GP.predict_SigmaBias(pred, var) - - if self.selectedIndices is not None: - newcov = np.diag(totalSigma2) - newcov[self.selectedIndices[var],self.selectedIndices[var]] = cov[self.selectedIndices[var],self.selectedIndices[var]] - else: - newcov = cov + # cov = np.diag(sigma2 * np.ones(len(pred))) + cov = np.diag( sigma2 * totalSigma2) # Sample a multi-variate normal distribution with mean of prediction and variance of cov - PosteriorPred[var][i] = np.random.multivariate_normal(pred, newcov, 1) + PosteriorPred[var][i] = np.random.multivariate_normal(pred, cov, 1) # ----- Prior Predictive ----- if self.SamplingMethod == 'rejection': @@ -731,6 +719,15 @@ class BayesInference: self.MeasuredData = pd.DataFrame(self.MeasuredData) ObservationData = self.MeasuredData[OutputNames].to_numpy() + # TODO: Create a GPR based Bias model + if hasattr(self, 'BiasInputs'): + from .discrepancy_GP import Bias + self.discrepancy_GP = Bias() + self.discrepancy_GP.fit_bias(self.PCEModel.ExpDesign.X, + self.PCEModel.ExpDesign.Y, + self.MeasuredData) + + nMeasurement, nOutputs = ObservationData.shape self.ntotMeasurement = ObservationData[~np.isnan(ObservationData)].shape[0] BootstrapItrNr = self.BootstrapItrNr if not self.BayesLOOCV else self.ntotMeasurement @@ -826,7 +823,13 @@ class BayesInference: # Evaluate the PCEModel if self.emulator: - self.__meanPCEPriorPred, self._stdPCEPriorPred = PCEModel.eval_metamodel(samples=self.Samples,name=self.Name) + if hasattr(PCEModel, 'errorModel') and PCEModel.errorModel: + discrepancy = True + else: + discrepancy = False + + self.__meanPCEPriorPred, self._stdPCEPriorPred = PCEModel.eval_metamodel(samples=self.Samples, + name=self.Name,discrepModel=discrepancy) # Surrogate model's error using RMSE of test data surrError = PCEModel.RMSE if hasattr(PCEModel,'RMSE') else None else: @@ -909,14 +912,6 @@ class BayesInference: # ---------------- Parameter Bayesian inference ---------------- if self.SamplingMethod.lower() == 'mcmc': - # TODO: Create a GPR based Bias model - if hasattr(self, 'BiasInputs'): - from .discrepancy_GP import Bias - self.discrepancy_GP = Bias() - self.discrepancy_GP.fit_bias(self.PCEModel.ExpDesign.X, - self.PCEModel.ExpDesign.Y, - self.MeasuredData) - # Convert the pandas data frame to numpy, if it's applicable. if self.MCMCinitSamples is not None and isinstance(self.MCMCinitSamples, pd.DataFrame): self.MCMCinitSamples = self.MCMCinitSamples.to_numpy() @@ -961,7 +956,7 @@ class BayesInference: figPosterior = corner.corner(self.Posterior_df.to_numpy(), labels=parNames, quantiles=[0.15, 0.5, 0.85],show_titles=True, title_fmt=self.Corner_title_fmt, - labelpad=0.2, + labelpad=0.15, use_math_text = True, title_kwargs={"fontsize": 28}) diff --git a/BayesValidRox/BayesInference/MCMC.py b/BayesValidRox/BayesInference/MCMC.py index 0db750ab88e192fcbf334bc5f9d206243ac84757..e241c614aba5904fea5320c5785d20b60ef47eeb 100755 --- a/BayesValidRox/BayesInference/MCMC.py +++ b/BayesValidRox/BayesInference/MCMC.py @@ -168,7 +168,7 @@ class MCMC(): if self.verbose: print("\nStep: {}".format(sampler.iteration)) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) - + # compute the autocorrelation time so far # using tol=0 means that we'll always get an estimate even if it isn't trustworthy tau = sampler.get_autocorr_time(tol=0) @@ -409,10 +409,15 @@ class MCMC(): BayesOpts = self.BayesOpts PCEModel = BayesOpts.PCEModel + if hasattr(PCEModel, 'errorModel') and PCEModel.errorModel: + discrepancy = True + else: + discrepancy = False Model = PCEModel.ModelObj if BayesOpts.emulator: # Evaluate the PCEModel - meanPred, stdPred = PCEModel.eval_metamodel(samples=theta, name=BayesOpts.Name) + meanPred, stdPred = PCEModel.eval_metamodel(samples=theta, name=BayesOpts.Name, + discrepModel=discrepancy) else: # Evaluate the origModel diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py index 0dd8dd2a813ad0aa5e810bd75c6213d97c2258a2..77fcf3495a108441512e0b407a3f2375e93e066e 100644 --- a/BayesValidRox/surrogate_models/surrogate_models.py +++ b/BayesValidRox/surrogate_models/surrogate_models.py @@ -85,7 +85,7 @@ class Metamodel: self.ModelOutputDict= None self.qNormDict = {} self.OptimalCollocationPointsBase = [] - self.errorModel = [] + # self.errorModel = [] self.SeqModifiedLOO = {} self.seqValidError = {} self.SeqBME = {} @@ -716,18 +716,19 @@ class Metamodel: return pca, OutputMatrix #-------------------------------------------------------------------------------------------------------- - def GaussianProcessEmulator(self, X, y, autoSelect=False, varIdx=None): + def GaussianProcessEmulator(self, X, y, nugTerm=None,autoSelect=False, varIdx=None): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct, ConstantKernel) + + nugTerm = nugTerm if nugTerm else np.var(y) - - kernels = [np.var(y) * RBF(length_scale=1.0, length_scale_bounds=(1e-5, 1e5)), - np.var(y) * RationalQuadratic(length_scale=0.2, alpha=1.0), + kernels = [nugTerm * RBF(length_scale=1.0, length_scale_bounds=(1e-15, 1e5)), + nugTerm * RationalQuadratic(length_scale=0.2, alpha=1.0), # 1.0 * ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-05, 1e05)), - np.var(y) * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 1e1), + nugTerm * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 1e1), nu=1.5)] if autoSelect:# Automatic selection of the kernel @@ -746,9 +747,9 @@ class Metamodel: else: gp = GaussianProcessRegressor(kernel=kernels[0], n_restarts_optimizer=5, - normalize_y=False) + alpha = 1e-15, normalize_y=False) gp.fit(X, y) - + # Compute score if varIdx is not None: Score = gp.score(X, y) @@ -872,9 +873,10 @@ class Metamodel: # self.gp_poly[key]["y_"+str(idx+1)] = self.GaussianProcessEmulator(CollocationPoints, self.LCerror[key]["y_"+str(idx+1)]) # self.gp_poly[key]["y_"+str(idx+1)] = outDict[idx] - # Construct error model based on LCerror - if self.metaModel.lower() == 'pcekriging': - errorModel = self.create_ModelError(CollocationPoints) + # Construct Discrepancy model + if hasattr(self, 'errorModel'): + name='valid' if 'valid' in self.ModelObj.Name else 'calib' + errorModel = self.create_ModelError(name) if not OptDesignFlag: @@ -883,84 +885,72 @@ class Metamodel: return self #-------------------------------------------------------------------------------------------------------- - def create_ModelError(self, CollocationPoints): + def create_ModelError(self,Name='Calib'): """ This function loops over the outputs and each time step/point to compute the PCE coefficients. """ - outputNames = self.ModelObj.Output.Names + Model = self.ModelObj + outputNames = Model.Output.Names self.errorRegMethod = 'GPE' self.errorCoeffsDict= self.AutoVivification() self.errorBasisDict = self.AutoVivification() self.errorScoresDict = self.AutoVivification() self.errorclf_poly = self.AutoVivification() self.errorLCerror = self.AutoVivification() - - # Metamodel's prediction at EDX - EDY_hat,_ = self.eval_metamodel(self.ExpDesign.X) + self.errorScale = {} # Original EDY EDY = self.ExpDesign.Y - # Evaluate the univariate polynomials on ExpDesign - if self.errorRegMethod != 'GPE': - self.erroruniv_p_val = self.univ_basis_vals(CollocationPoints) + # Prepare the input matrix + from sklearn.preprocessing import MinMaxScaler + self.errorScale = {} - for key in outputNames: - - for idx in range(EDY[key].shape[1]): - - Y_ED_hat = EDY_hat[key][:,idx] - Y_ED = EDY[key][:,idx] - ModifiedLOO = abs(Y_ED-Y_ED_hat) + # Read data + if Name.lower() == 'calib': + MeasuredData = Model.read_Observation() + else: + MeasuredData = Model.read_ObservationValid() - if self.errorRegMethod == 'GPE': - self.errorclf_poly[key]["y_"+str(idx+1)] = self.GaussianProcessEmulator(CollocationPoints, ModifiedLOO) - else: - outDict = self.adaptiveSparseaPCE(CollocationPoints, ModifiedLOO,idx) - - self.errorCoeffsDict[key]["y_"+str(idx+1)] = outDict['coeffs'] - self.errorBasisDict[key]["y_"+str(idx+1)] = outDict['PolynomialDegrees'] - self.errorScoresDict[key]["y_"+str(idx+1)] = outDict['LOOCVScore'] - self.errorclf_poly[key]["y_"+str(idx+1)] = outDict['clf_poly'] - self.errorLCerror[key]["y_"+str(idx+1)] = outDict['LCerror'] + for out in tqdm(outputNames, ascii=True, desc ="Fitting {} based bias model".format(self.errorRegMethod)): + # Select data + try: + data = MeasuredData[out].to_numpy()[~np.isnan(MeasuredData[out])] + except: + data = MeasuredData[out][~np.isnan(MeasuredData[out])] + + # Prepare the input matrix + self.errorScale[out] = MinMaxScaler() + Expected_Y = np.mean(EDY[out],axis=0).reshape(-1,1) + BiasInputs = np.hstack((self.BiasInputs[out],Expected_Y)) + X_S = self.errorScale[out].fit_transform(BiasInputs) + + self.errorclf_poly[out]["y_1"] = self.GaussianProcessEmulator(X_S, data,nugTerm=np.var(data)) + return self #-------------------------------------------------------------------------------------------------------- - def eval_errormodel(self,Samples): - + def eval_errormodel(self,predictions): meanPCEOutputs = {} stdPCEOutputs = {} - univ_p_val = self.univ_basis_vals(Samples) - + for Outkey, ValuesDict in self.errorclf_poly.items(): - PCEOutputs_mean = np.zeros((len(Samples), len(ValuesDict))) - PCEOutputs_std = np.zeros((len(Samples), len(ValuesDict))) + PCEOutputs_mean = np.zeros_like(predictions[Outkey]) + PCEOutputs_std = np.zeros_like(predictions[Outkey]) for Inkey, InIdxValues in ValuesDict.items(): - idx = int(Inkey.split('_')[1]) - 1 - if self.errorRegMethod == 'GPE': - gp = self.errorclf_poly[Outkey][Inkey] - PCEOutputs_mean[:, idx], PCEOutputs_std[:, idx] = gp.predict(Samples, return_std=True) + gp = self.errorclf_poly[Outkey][Inkey] - else: - PolynomialDegrees = self.errorBasisDict[Outkey][Inkey] - PSI_Val = self.PCE_create_Psi(PolynomialDegrees, univ_p_val) + # Transform Samples using scaler + for j,pred in enumerate(predictions[Outkey]): + BiasInputs = np.hstack((self.BiasInputs[Outkey],pred.reshape(-1,1))) + Samples_S = self.errorScale[Outkey].transform(BiasInputs) + + PCEOutputs_mean[j], PCEOutputs_std[j] = gp.predict(Samples_S, return_std=True) - # Perdiction - try: # with error bar - clf_poly = self.errorclf_poly[Outkey][Inkey] - PCEOutputs_mean[:, idx], PCEOutputs_std[:, idx] = clf_poly.predict(PSI_Val, return_std=True) - - # PCEOutputs_mean[:, idx] = y_mean - # PCEOutputs_std[:, idx] = y_std - - except: # without error bar - coeffs = self.CoeffsDict[Outkey][Inkey] - PCEOutputs_mean[:, idx] = np.dot(PSI_Val, coeffs) - meanPCEOutputs[Outkey] = PCEOutputs_mean stdPCEOutputs[Outkey] = PCEOutputs_std @@ -1964,7 +1954,7 @@ class Metamodel: return #-------------------------------------------------------------------------------------------------------- - def eval_metamodel(self,samples=None, nsamples=None,Y=None, samplingMethod='random', name='Calib', return_samples=False): + def eval_metamodel(self,samples=None, nsamples=None,Y=None, discrepModel=False,samplingMethod='random', name='Calib', return_samples=False): ModelDict = self.gp_poly if self.metaModel.lower() == 'gpe' else self.CoeffsDict rosenblatt = self.Inputs.Rosenblatt @@ -1994,9 +1984,6 @@ class Metamodel: meanPCEOutputs = {} stdPCEOutputs = {} - # TODO: Evaluate error model - if self.metaModel.lower() == 'pcekriging': - y_error, std_error = self.eval_errormodel(samples) # Loop over outputs cnt = 0 @@ -2053,8 +2040,9 @@ class Metamodel: meanPCEOutputs[Outkey] = PCEOutputs_mean stdPCEOutputs[Outkey] = PCEOutputs_std - if self.metaModel.lower() == 'pcekriging' and bool(y_error): - stdPCEOutputs[Outkey] += y_error[Outkey] + # if discrepModel and bool(y_error): + # meanPCEOutputs[Outkey] += y_error[Outkey] + # stdPCEOutputs[Outkey] += std_error[Outkey] else: index = self.index[cnt] if name.lower() == 'calib': @@ -2065,8 +2053,8 @@ class Metamodel: else: meanPCEOutputs[Outkey] = PCEOutputs_mean[:,:index] stdPCEOutputs[Outkey] = PCEOutputs_std[:,:index] - if self.metaModel.lower() == 'pcekriging' and bool(y_error): - stdPCEOutputs[Outkey] += y_error[Outkey][:,:index] + # if discrepModel and bool(y_error): + # stdPCEOutputs[Outkey] += y_error[Outkey][:,:index] else: if self.DimRedMethod.lower() == 'pca': PCA = self.pca[Outkey] @@ -2075,10 +2063,14 @@ class Metamodel: else: meanPCEOutputs[Outkey] = PCEOutputs_mean[:,index:] stdPCEOutputs[Outkey] = PCEOutputs_std[:,index:] - if self.metaModel.lower() == 'pcekriging' and bool(y_error): - stdPCEOutputs[Outkey] += y_error[Outkey][:,index:] + # if discrepModel and bool(y_error): + # stdPCEOutputs[Outkey] += y_error[Outkey][:,index:] cnt += 1 - + + # Evaluate error model + if discrepModel: + meanPCEOutputs, stdPCEOutputs = self.eval_errormodel(meanPCEOutputs) + if return_samples: return meanPCEOutputs, stdPCEOutputs, samples else: