diff --git a/examples/analytical-function/example_analytical_function.py b/examples/analytical-function/example_analytical_function.py index 52e7731b576c42cdd29a705865f0a0389f812654..e052b57d0e0efcdaea0bffecdbc8590efc4b31c3 100644 --- a/examples/analytical-function/example_analytical_function.py +++ b/examples/analytical-function/example_analytical_function.py @@ -25,7 +25,7 @@ matplotlib.use('agg') # import bayesvalidrox # Add BayesValidRox path -sys.path.append("src/") +sys.path.append("../../src/") from bayesvalidrox.pylink.pylink import PyLinkForwardModel from bayesvalidrox.surrogate_models.inputs import Input diff --git a/src/bayesvalidrox/bayes_inference/bayes_inference.py b/src/bayesvalidrox/bayes_inference/bayes_inference.py index 00971b92af1bf574d1c61a470cfdde7506cffecb..b1208f573187135db46e47226ac2c102e2c9c2cc 100644 --- a/src/bayesvalidrox/bayes_inference/bayes_inference.py +++ b/src/bayesvalidrox/bayes_inference/bayes_inference.py @@ -158,9 +158,40 @@ class BayesInference: self.plot_map_pred = plot_map_pred self.max_a_posteriori = max_a_posteriori self.corner_title_fmt = corner_title_fmt +<<<<<<< HEAD # ------------------------------------------------------------------------- def create_inference(self): +======= + self.out_dir = out_dir + self.bmc = bmc # Set to True, if you want to cut short to only Model Comparison + + # Other properties and parameters (found in code, but never set) + self.error_model = False # TODO: no example or use case for this! + self.bias_inputs = None + self.measurement_error = None # TODO: what is this? + self.sigma2s = None + self.log_likes = None + self.n_tot_measurement = None + self.Discrepancy = None + self.posterior_df = None + self.error_MetaModel = None + self._mean_pce_prior_pred = None + self._std_pce_prior_pred = None + self.__model_prior_pred = None + self.MCMC_Obj = None + + # System settings + if os.name == 'nt': + print('') + print('WARNING: Performing the inference on windows can lead to reduced accuracy!') + print('') + self.dtype = np.longdouble + else: + self.dtype = np.float128 + + def setup_inference(self): +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) """ Starts the inference. @@ -178,10 +209,23 @@ class BayesInference: output_names = Model.Output.names par_names = self.engine.ExpDesign.par_names +<<<<<<< HEAD # If the prior is set by the user, take it. if self.samples is None: self.samples = self.engine.ExpDesign.generate_samples( self.n_samples, 'random') +======= + # Create output directory + if self.out_dir == '': + self.out_dir = f'Outputs_Bayes_{self.engine.Model.name}_{self.name}' + os.makedirs(self.out_dir, exist_ok=True) + + # If the prior is set by the user, take it, else generate from ExpDes + if self.prior_samples is None: + print('Generating prior samples from the Experimental Design.') + self.prior_samples = self.engine.ExpDesign.generate_samples( + self.n_prior_samples, 'random') +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) else: try: samples = self.samples.values @@ -197,6 +241,7 @@ class BayesInference: # ---------- Preparation of observation data ---------- # Read observation data and perturb it if requested. if self.measured_data is None: + print('Reading the observation data.') self.measured_data = Model.read_observation(case=self.name) # Convert measured_data to a data frame if not isinstance(self.measured_data, pd.DataFrame): @@ -255,8 +300,13 @@ class BayesInference: total_sigma2[key] = sigma2 +<<<<<<< HEAD self.Discrepancy.opt_sigma = opt_sigma self.Discrepancy.total_sigma2 = total_sigma2 +======= + # TODO: can use this to debug! + self.Discrepancy.total_sigma2 = total_sigma2 +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) # If inferred sigma2s obtained from e.g. calibration are given try: @@ -451,10 +501,29 @@ class BayesInference: return self # ---------------- Parameter Bayesian inference ---------------- +<<<<<<< HEAD if self.inference_method.lower() == 'mcmc': # Instantiate the MCMC object MCMC_Obj = MCMC(self) self.posterior_df = MCMC_Obj.run_sampler( +======= + # Convert to a dataframe if samples are provided after calibration. + if self.name.lower() == 'valid': + self.posterior_df = pd.DataFrame(self.prior_samples, columns=self.engine.ExpDesign.par_names) + # Instantiate the MCMC object + elif self.inference_method.lower() == 'mcmc': + #MCMC_Obj = MCMC(self) + # Decoupled MCMC from BayesInf + self.MCMC_Obj = MCMC(self.engine, self.mcmc_params, + self.Discrepancy, + # TODO: add this in when the error_MetaModel/error_model works + # self.BiasInputs, + self.bias_inputs, + self.error_model, self.req_outputs, + self.selected_indices, self.emulator, + self.out_dir, self.name) + self.posterior_df = self.MCMC_Obj.run_sampler( +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) self.measured_data, total_sigma2 ) @@ -475,8 +544,76 @@ class BayesInference: print('-'*50) # -------- Model Discrepancy ----------- +<<<<<<< HEAD if hasattr(self, 'error_model') and self.error_model \ and self.name.lower() == 'calib': +======= + if self.error_model and self.name.lower() == 'calib' and self.MCMC_Obj is not None: # TODO: where is this used + # and what does it actually do there? + self.create_error_model(opt_sigma=opt_sigma, + type_='posterior', sampler=self.MCMC_Obj) + + # -------- Posterior predictive ----------- + self._posterior_predictive() + + # ------------------ Visualization -------------------- + # Posterior parameters + self.plot_post_params(opt_sigma) + + # Plot MAP + if self.plot_map_pred: + self._plot_max_a_posteriori() + + # Plot log_BME dist + if self.bootstrap: + self.plot_log_BME() + + # Plot posterior predictive + if self.plot_post_pred: + self._plot_post_predictive() + + return self + + def create_error_model(self, type_='posterior', opt_sigma='B', sampler=None): + """ + Creates an error model in the engine.MetaModel based on input dist + samples of the chosen type + + Parameters + ---------- + opt_sigma : string, optional + Type of uncertainty description, only used if type_=='posterior'. + The default is 'B' + type_ : string + Type of parameter samples to use, either 'prior' or 'posterior'. + The default is 'posterior'. + sampler : MCMC, optional + Should be an MCMC object if type=='posterior' and MCMC is used in + the inference.In al other cases this parameter is not needed. + + Returns + ------- + None. + + """ + n_params = self.engine.MetaModel.n_params + + # Get MAP estimate from prior samples + if type_ == 'prior': + # Select prior ? mean as MAP + MAP_theta = self.prior_samples.mean(axis=0).reshape((1, n_params)) + + # Evaluate the (meta-)model at the MAP + y_MAP, y_std_MAP = self.engine.MetaModel.eval_metamodel(samples=MAP_theta) + + # Train a GPR meta-model using MAP + self.error_MetaModel = self.engine.MetaModel.create_model_error( + self.bias_inputs, y_MAP, self.measured_data, name=self.name + ) + + # Get MAP estimate from posterior samples + if type_ == 'posterior': +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) if self.inference_method.lower() == 'mcmc': self.error_MetaModel = MCMC_Obj.error_MetaModel else: @@ -546,10 +683,65 @@ class BayesInference: for ax in figPosterior.axes: ax.grid(False) +<<<<<<< HEAD if self.emulator: plotname = f'/Posterior_Dist_{Model.name}_emulator' else: plotname = f'/Posterior_Dist_{Model.name}' +======= + # Adding some zero mean noise to the observation function + if len(self.perturbed_data) == 0: + self.perturbed_data = self._perturb_data( + self.measured_data, output_names + ) + else: + self.n_bootstrap_itrs = len(self.perturbed_data) + + # -------- Model Discrepancy ----------- + if self.error_model and self.name.lower() == 'valid': # TODO: what should be set so that this is tested? + print('Creating the error model.') + self.create_error_model(type_='prior') + # ----------------------------------------------------- + # ----- Loop over the perturbed observation data ------ + # ----------------------------------------------------- + # Initilize arrays + logLikelihoods = np.zeros((self.n_prior_samples, self.n_bootstrap_itrs), + dtype=np.float16) + BME_Corr = np.zeros(self.n_bootstrap_itrs) + log_BME = np.zeros(self.n_bootstrap_itrs) + KLD = np.zeros(self.n_bootstrap_itrs) + inf_entropy = np.zeros(self.n_bootstrap_itrs) + + # Compute the prior predictions + # Evaluate the MetaModel + if self.emulator: + print('Evaluating the metamodel on the prior samples.') + y_hat, y_std = MetaModel.eval_metamodel(samples=self.prior_samples) + self.__mean_pce_prior_pred = y_hat + self._std_pce_prior_pred = y_std + + # Correct the predictions with Model discrepancy + if self.error_model: # TODO this does not check for calib? + y_hat_corr, y_std = self.error_MetaModel.eval_model_error( + self.bias_inputs, self.__mean_pce_prior_pred) + self.__mean_pce_prior_pred = y_hat_corr + self._std_pce_prior_pred = y_std + + # Surrogate model's error using RMSE of test data + if MetaModel.rmse is not None: + surrError = MetaModel.rmse + else: + surrError = None + model_evals = self.__mean_pce_prior_pred + + # Evaluate the model + else: + print('Evaluating the model on the prior samples.') + self.__model_prior_pred = self._eval_model( + samples=self.prior_samples, key='PriorPred') + model_evals = self.__model_prior_pred + surrError = None +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) figPosterior.set_size_inches((24, 16)) figPosterior.savefig(f'./{out_dir}{plotname}.pdf', diff --git a/src/bayesvalidrox/bayes_inference/mcmc.py b/src/bayesvalidrox/bayes_inference/mcmc.py index fdc70939aa1ab813a9440a23b0d96270fc8ff750..56bcdea0344cbeca45ea638866d24a371772e11b 100755 --- a/src/bayesvalidrox/bayes_inference/mcmc.py +++ b/src/bayesvalidrox/bayes_inference/mcmc.py @@ -9,12 +9,181 @@ import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import multiprocessing import scipy.stats as st -from scipy.linalg import cholesky as chol -import warnings +from scipy import stats import shutil +import scipy.linalg as spla +from sklearn import preprocessing os.environ["OMP_NUM_THREADS"] = "1" +<<<<<<< HEAD +======= +# ------------------------------------------------------------------------- +def _check_ranges(theta, ranges): # TODO: this is a replica of exp_designs.check_ranges + """ + This function checks if theta lies in the given ranges. + + Parameters + ---------- + theta : array + Proposed parameter set. + ranges : nested list + List of the praremeter ranges. + + Returns + ------- + c : bool + If it lies in the given range, it return True else False. + + """ + c = True + # traverse in the list1 + for i, bounds in enumerate(ranges): + x = theta[i] + # condition check + if x < bounds[0] or x > bounds[1]: + c = False + return c + return c + +# ------------------------------------------------------------------------- +def gelman_rubin(chain, return_var=False): + """ + The potential scale reduction factor (PSRF) defined by the variance + within one chain, W, with the variance between chains B. + Both variances are combined in a weighted sum to obtain an estimate of + the variance of a parameter \\( \\theta \\).The square root of the + ratio of this estimates variance to the within chain variance is called + the potential scale reduction. + For a well converged chain it should approach 1. Values greater than + 1.1 typically indicate that the chains have not yet fully converged. + + Source: http://joergdietrich.github.io/emcee-convergence.html + + https://github.com/jwalton3141/jwalton3141.github.io/blob/master/assets/posts/ESS/rwmh.py + + Parameters + ---------- + chain : array (n_walkers, n_steps, n_params) + The emcee ensamples. + + Returns + ------- + R_hat : float + The Gelman-Robin values. + + """ + chain = np.array(chain) + m_chains, n_iters = chain.shape[:2] + + # Calculate between-chain variance + θb = np.mean(chain, axis=1) + θbb = np.mean(θb, axis=0) + B_over_n = ((θbb - θb)**2).sum(axis=0) + B_over_n /= (m_chains - 1) + + # Calculate within-chain variances + ssq = np.var(chain, axis=1, ddof=1) + W = np.mean(ssq, axis=0) + + # (over) estimate of variance + var_θ = W * (n_iters - 1) / n_iters + B_over_n + + if return_var: + return var_θ + else: + # The square root of the ratio of this estimates variance to the + # within chain variance + R_hat = np.sqrt(var_θ / W) + return R_hat + +# ------------------------------------------------------------------------- +def _kernel_rbf(X, hyperparameters): + """ + Isotropic squared exponential kernel. + + Higher l values lead to smoother functions and therefore to coarser + approximations of the training data. Lower l values make functions + more wiggly with wide uncertainty regions between training data points. + + sigma_f controls the marginal variance of b(x) + + Parameters + ---------- + X : ndarray of shape (n_samples_X, n_features) + + hyperparameters : Dict + Lambda characteristic length + sigma_f controls the marginal variance of b(x) + sigma_0 unresolvable error nugget term, interpreted as random + error that cannot be attributed to measurement error. + Returns + ------- + var_cov_matrix : ndarray of shape (n_samples_X,n_samples_X) + Kernel k(X, X). + + """ + from sklearn.gaussian_process.kernels import RBF + min_max_scaler = preprocessing.MinMaxScaler() + X_minmax = min_max_scaler.fit_transform(X) + + nparams = len(hyperparameters) + if nparams < 3: + raise AttributeError('Provide 3 parameters for the RBF kernel!') + + # characteristic length (0,1] + Lambda = hyperparameters[0] + # sigma_f controls the marginal variance of b(x) + sigma2_f = hyperparameters[1] + + rbf = RBF(length_scale=Lambda) + cov_matrix = sigma2_f * rbf(X_minmax) + + # (unresolvable error) nugget term that is interpreted as random + # error that cannot be attributed to measurement error. + sigma2_0 = hyperparameters[2:] + for i, j in np.ndindex(cov_matrix.shape): + cov_matrix[i, j] += np.sum(sigma2_0) if i == j else 0 + + return cov_matrix + + +# ------------------------------------------------------------------------- +def _logpdf(x, mean, cov): + """ + Computes the likelihood based on a multivariate normal distribution. + + Parameters + ---------- + x : TYPE + DESCRIPTION. + mean : array_like + Observation data. + cov : 2d array + Covariance matrix of the distribution. + + Returns + ------- + log_lik : float + Log likelihood. + + """ + + # Tranform into np arrays + x = np.array(x) + mean = np.array(mean) + cov = np.array(cov) + + n = len(mean) + L = spla.cholesky(cov, lower=True) + beta = np.sum(np.log(np.diag(L))) + dev = x - mean + alpha = dev.dot(spla.cho_solve((L, True), dev)) + log_lik = -0.5 * alpha - beta - n / 2. * np.log(2 * np.pi) + return log_lik + + +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) class MCMC: """ A class for bayesian inference via a Markov-Chain Monte-Carlo (MCMC) @@ -40,12 +209,52 @@ class MCMC: Bayes object. """ +<<<<<<< HEAD def __init__(self, BayesOpts): self.BayesOpts = BayesOpts +======= + def __init__(self, engine, mcmc_params, Discrepancy, bias_inputs, + error_model, req_outputs, selected_indices, emulator, + out_dir, name, BiasInputs = None): + + # TODO: maybe would be worth to make this a child class of BayesInf? + # Inputs + #self.BayesOpts = BayesOpts + self.engine = engine + self.Discrepancy = Discrepancy + + # Get the needed properties from the BayesInf class + self.bias_inputs = bias_inputs + self.error_model = error_model + self.BiasInputs = BiasInputs + self.selected_indices = selected_indices + self.req_outputs = req_outputs + self.emulator = emulator + self.out_dir = out_dir + self.name = name + + # Param inits + self.counter = 0 + self.observation = None + self.total_sigma2 = None + self.error_MetaModel = None + + # Get MCMC parameters from BayesOpts + self.initsamples = mcmc_params['init_samples'] + if isinstance(self.initsamples, pd.DataFrame): + self.initsamples = self.initsamples.values + self.nsteps = int(mcmc_params['n_steps']) + self.nwalkers = int(mcmc_params['n_walkers']) + self.nburn = mcmc_params['n_burn'] + self.moves = mcmc_params['moves'] + self.mp = mcmc_params['multiprocessing'] + self.verbose = mcmc_params['verbose'] +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) def run_sampler(self, observation, total_sigma2): +<<<<<<< HEAD BayesObj = self.BayesOpts MetaModel = BayesObj.engine.MetaModel Model = BayesObj.engine.Model @@ -57,6 +266,28 @@ class MCMC: output_dir = f'Outputs_Bayes_{Model.name}_{self.BayesOpts.name}' if not os.path.exists(output_dir): os.makedirs(output_dir) +======= + Parameters + ---------- + observation : TYPE + DESCRIPTION. + total_sigma2 : TYPE + DESCRIPTION. + + Returns + ------- + Posterior_df : TYPE + DESCRIPTION. + + """ + # Get init values + engine = self.engine + Discrepancy = self.Discrepancy + n_cpus = engine.Model.n_cpus + ndim = engine.MetaModel.n_params + if not os.path.exists(self.out_dir): + os.makedirs(self.out_dir) +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) self.observation = observation self.total_sigma2 = total_sigma2 @@ -96,10 +327,19 @@ class MCMC: np.random.seed(0) if self.initsamples is None: try: +<<<<<<< HEAD initsamples = priorDist.sample(self.nwalkers).T +======= + # Version 1 # TODO: recheck this code, it it a mix of old and new + priorDist = self.engine.ExpDesign.JDist + initsamples = priorDist.sample(self.nwalkers).T + +# initsamples = engine.ExpDesign.JDist.sample(self.nwalkers).T +# initsamples = np.swapaxes(np.array([initsamples]),0,1) # TODO: test if this still works with multiple input dists +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) except: # when aPCE selected - gaussian kernel distribution - inputSamples = BayesObj.engine.ExpDesign.raw_data.T + inputSamples = engine.ExpDesign.raw_data.T random_indices = np.random.choice( len(inputSamples), size=self.nwalkers, replace=False ) @@ -125,7 +365,7 @@ class MCMC: initsamples[:, idx_dim] = dist.rvs(size=self.nwalkers) # Update lower and upper - BayesObj.engine.ExpDesign.bound_tuples = bound_tuples + engine.ExpDesign.bound_tuples = bound_tuples # Check if sigma^2 needs to be inferred if Discrepancy.opt_sigma != 'B': @@ -141,10 +381,10 @@ class MCMC: disc_bound_tuple = Discrepancy.ExpDesign.bound_tuples # Update bound_tuples - BayesObj.engine.ExpDesign.bound_tuples += disc_bound_tuple + engine.ExpDesign.bound_tuples += disc_bound_tuple print("\n>>>> Bayesian inference with MCMC for " - f"{self.BayesOpts.name} started. <<<<<<") + f"{self.name} started. <<<<<<") # Set up the backend filename = f"{output_dir}/emcee_sampler.h5" @@ -193,7 +433,8 @@ class MCMC: self.nwalkers, ndim, self.log_posterior, moves=self.moves, backend=backend, vectorize=True ) - + print(f'ndim: {ndim}') + print(f'initsamples.shape: {initsamples.shape}') # Check if a burn-in phase is needed! if self.initsamples is None: # Burn-in @@ -228,8 +469,9 @@ class MCMC: continue # Train model discrepancy/error - if hasattr(BayesObj, 'errorModel') and BayesObj.errorModel \ - and not sampler.iteration % 3 * autocorreverynsteps: + # TODO: add this back in when the error model is actually working + # and this is not dependent on BayesObj + if self.error_model and not sampler.iteration % 3 * autocorreverynsteps: try: self.error_MetaModel = self.train_error_model(sampler) except: @@ -293,17 +535,17 @@ class MCMC: print("** These values must be smaller than 1.1.") print('-'*50) - print(f"\n>>>> Bayesian inference with MCMC for {self.BayesOpts.name} " + print(f"\n>>>> Bayesian inference with MCMC for {self.name} " "successfully completed. <<<<<<\n") # Extract parameter names and their prior ranges - par_names = self.BayesOpts.engine.ExpDesign.par_names + par_names = engine.ExpDesign.par_names if Discrepancy.opt_sigma != 'B': for i in range(len(Discrepancy.InputDisc.Marginals)): par_names.append(Discrepancy.InputDisc.Marginals[i].name) - params_range = self.BayesOpts.engine.ExpDesign.bound_tuples + params_range = engine.ExpDesign.bound_tuples # Plot traces if self.verbose and self.nsteps < 10000: @@ -397,9 +639,15 @@ class MCMC: returned otherwise an array. """ +<<<<<<< HEAD MetaModel = self.BayesOpts.MetaModel Discrepancy = self.BayesOpts.Discrepancy +======= + engine = self.engine + MetaModel = engine.MetaModel + Discrepancy = self.Discrepancy +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) # Find the number of sigma2 parameters if Discrepancy.opt_sigma != 'B': @@ -409,8 +657,8 @@ class MCMC: n_sigma2 = len(disc_bound_tuples) else: n_sigma2 = -len(theta) - prior_dist = self.BayesOpts.engine.ExpDesign.prior_space - params_range = self.BayesOpts.engine.ExpDesign.bound_tuples + prior_dist = engine.ExpDesign.prior_space + params_range = engine.ExpDesign.bound_tuples theta = theta if theta.ndim != 1 else theta.reshape((1, -1)) nsamples = theta.shape[0] logprior = -np.inf*np.ones(nsamples) @@ -461,10 +709,15 @@ class MCMC: Log likelihood. """ +<<<<<<< HEAD BayesOpts = self.BayesOpts MetaModel = BayesOpts.MetaModel Discrepancy = self.BayesOpts.Discrepancy +======= + MetaModel = self.engine.MetaModel + Discrepancy = self.Discrepancy +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) # Find the number of sigma2 parameters if Discrepancy.opt_sigma != 'B': @@ -481,17 +734,18 @@ class MCMC: theta = theta if theta.ndim != 1 else theta.reshape((1, -1)) # Evaluate Model/MetaModel at theta - mean_pred, BayesOpts._std_pce_prior_pred = self.eval_model(theta) + mean_pred, _std_pce_prior_pred = self.eval_model(theta) # Surrogate model's error using RMSE of test data surrError = MetaModel.rmse if hasattr(MetaModel, 'rmse') else None # Likelihood - log_like = BayesOpts.normpdf( + log_like = self.normpdf( mean_pred, self.observation, self.total_sigma2, sigma2, std=surrError ) - return log_like + # TODO: give the second return argument back to BayesInf (parameter of the same name) + return log_like, _std_pce_prior_pred # ------------------------------------------------------------------------- def log_posterior(self, theta): @@ -520,7 +774,7 @@ class MCMC: # Compute log prior log_prior = self.log_prior(theta) # Compute log Likelihood - log_likelihood = self.log_likelihood(theta) + log_likelihood, _std_pce_prior_pred = self.log_likelihood(theta) return log_prior + log_likelihood else: @@ -535,7 +789,7 @@ class MCMC: # Compute loLikelihoods if non_inf_idx.size != 0: - log_likelihood[non_inf_idx] = self.log_likelihood( + log_likelihood[non_inf_idx], _std_pce_prior_pred = self.log_likelihood( theta[non_inf_idx] ) @@ -559,7 +813,10 @@ class MCMC: Std of model prediction. """ + engine = self.engine + Model = engine.Model +<<<<<<< HEAD BayesObj = self.BayesOpts MetaModel = BayesObj.MetaModel Model = BayesObj.engine.Model @@ -567,6 +824,11 @@ class MCMC: if BayesObj.emulator: # Evaluate the MetaModel mean_pred, std_pred = MetaModel.eval_metamodel(samples=theta) +======= + if self.emulator: + # Evaluate the MetaModel + mean_pred, std_pred = engine.MetaModel.eval_metamodel(samples=theta) +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) else: # Evaluate the origModel mean_pred, std_pred = dict(), dict() @@ -587,9 +849,9 @@ class MCMC: # Add one to the counter self.counter += 1 - if hasattr(self, 'error_MetaModel') and BayesObj.error_model: + if self.error_MetaModel is not None and self.error_model: meanPred, stdPred = self.error_MetaModel.eval_model_error( - BayesObj.BiasInputs, mean_pred + self.BiasInputs, mean_pred ) return mean_pred, std_pred @@ -610,8 +872,12 @@ class MCMC: A error model. """ +<<<<<<< HEAD BayesObj = self.BayesOpts MetaModel = BayesObj.MetaModel +======= + MetaModel = self.engine.MetaModel +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) # Prepare the poster samples try: @@ -636,6 +902,7 @@ class MCMC: # Train a GPR meta-model using MAP error_MetaModel = MetaModel.create_model_error( +<<<<<<< HEAD BayesObj.BiasInputs, y_map, name='Calib') return error_MetaModel @@ -907,3 +1174,128 @@ class MCMC: c = False return c return c +======= + self.BiasInputs, y_map, name='Calib') + + return error_MetaModel + + # ------------------------------------------------------------------------- + def normpdf(self, outputs, obs_data, total_sigma2s, sigma2=None, std=None): + """ + Calculates the likelihood of simulation outputs compared with + observation data. + + Parameters + ---------- + outputs : dict + A dictionary containing the simulation outputs as array of shape + (n_samples, n_measurement) for each model output. + obs_data : dict + A dictionary/dataframe containing the observation data. + total_sigma2s : dict + A dictionary with known values of the covariance diagonal entries, + a.k.a. sigma^2. + sigma2 : array, optional + An array of the sigma^2 samples, when the covariance diagonal + entries are unknown and are being jointly inferred. The default is + None. + std : dict, optional + A dictionary containing the root mean squared error as array of + shape (n_samples, n_measurement) for each model output. The default + is None. + + Returns + ------- + logLik : array of shape (n_samples) + Likelihoods. + + """ + Model = self.engine.Model + logLik = 0.0 + + # Extract the requested model outputs for likelihood calulation + if self.req_outputs is None: + req_outputs = Model.Output.names # TODO: should this then be saved as self.req_outputs? + else: + req_outputs = list(self.req_outputs) + + # Loop over the output keys + for idx, out in enumerate(req_outputs): + + # (Meta)Model Output + nsamples, nout = outputs[out].shape + + # Prepare data and remove NaN + try: + data = obs_data[out].values[~np.isnan(obs_data[out])] + except AttributeError: + data = obs_data[out][~np.isnan(obs_data[out])] + + # Prepare data uncertainty / error estimation (sigma2s) + non_nan_indices = ~np.isnan(total_sigma2s[out]) + tot_sigma2s = total_sigma2s[out][non_nan_indices][:nout] + + # Add the std of the PCE if an emulator is used + if self.emulator: + if std is not None: + tot_sigma2s += std[out] ** 2 + + # Select the data points to compare + try: + indices = self.selected_indices[out] + except: + indices = list(range(nout)) + + # Set up Covariance Matrix + covMatrix = np.diag(np.diag(tot_sigma2s)[indices, indices]) + + # If sigma2 is not given, use given total_sigma2s and move to next itr + if sigma2 is None: + logLik += stats.multivariate_normal.logpdf( + outputs[out][:, indices], data[indices], covMatrix) + continue + + # Loop over each run/sample and calculate logLikelihood + logliks = np.zeros(nsamples) + for s_idx in range(nsamples): + + # Simulation run + tot_outputs = outputs[out] + + # Covariance Matrix + covMatrix = np.diag(tot_sigma2s) + + # Check the type error term + if self.bias_inputs is not None and self.error_model is None: + # Infer a Bias model usig Gaussian Process Regression + bias_inputs = np.hstack( + (self.bias_inputs[out], + tot_outputs[s_idx].reshape(-1, 1))) + + params = sigma2[s_idx, idx * 3:(idx + 1) * 3] + covMatrix = _kernel_rbf(bias_inputs, params) + else: + # Infer equal sigma2s + try: + sigma_2 = sigma2[s_idx, idx] + except TypeError: + sigma_2 = 0.0 + + covMatrix += sigma_2 * np.eye(nout) + # covMatrix = np.diag(sigma2 * total_sigma2s) + + # Select the data points to compare + try: + indices = self.selected_indices[out] + except: + indices = list(range(nout)) + covMatrix = np.diag(covMatrix[indices, indices]) + + # Compute loglikelihood + logliks[s_idx] = _logpdf( + tot_outputs[s_idx, indices], data[indices], covMatrix + ) + logLik += logliks + return logLik + +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py index 42307d4770d4ae23a40107dfea64057aac682c23..bcaae6c5756ddc66730b277fc914704dd5eeec8e 100644 --- a/src/bayesvalidrox/surrogate_models/engine.py +++ b/src/bayesvalidrox/surrogate_models/engine.py @@ -189,9 +189,15 @@ class Engine(): # Prepare X samples # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on ExpDesign.generate_ED(ExpDesign.n_init_samples, +<<<<<<< HEAD transform=True, max_pce_deg=np.max(MetaModel.pce_deg)) +======= + #transform=True, + max_pce_deg=np.max(MetaModel.pce_deg)) + +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) # Run simulations at X if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: print('\n Now the forward model needs to be run!\n') diff --git a/src/bayesvalidrox/surrogate_models/input_space.py b/src/bayesvalidrox/surrogate_models/input_space.py index 4e010d66f2933ec243bad756d8f2c5454808d802..22bb78acc2c1901913099d1fa3271294ad9a671e 100644 --- a/src/bayesvalidrox/surrogate_models/input_space.py +++ b/src/bayesvalidrox/surrogate_models/input_space.py @@ -7,6 +7,7 @@ Input space built from set prior distributions import numpy as np import chaospy import scipy.stats as st +from tqdm import tqdm class InputSpace: @@ -357,7 +358,10 @@ class InputSpace: cdfx = np.zeros((X.shape)) tr_X = np.zeros((X.shape)) - for par_i in range(n_params): + # TODO: this transformation takes quite a while, + # especially for many samples to transform, can it be improved? + for par_i in range(n_params):#tqdm(range(n_params), + # desc='Transforming the input samples'): # Extract the parameters of the original space disttype = disttypes[par_i] diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index ca902f26bef0c45e8befb72ff67313ef09a77603..8f7c8df9569eeb91f38cae8713524078ff269ba3 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -1190,6 +1190,11 @@ class MetaModel(): method='user' ) # Compute univariate bases for the given samples +<<<<<<< HEAD +======= + #print('Creating the univariate basis.') + univ_p_val = None +>>>>>>> 2306e76d (Decoupled MCMC from BayesInference and improved performance) if self.meta_model_type.lower() != 'gpe': univ_p_val = self.univ_basis_vals( samples, @@ -1199,6 +1204,7 @@ class MetaModel(): mean_pred_b = {} std_pred_b = {} # Loop over bootstrap iterations + #print('Looping over bootstrap iterations.') for b_i in range(self.n_bootstrap_itrs): # Extract model dictionary @@ -1210,11 +1216,14 @@ class MetaModel(): # Loop over outputs mean_pred = {} std_pred = {} + #print(model_dict.items()) + #print('Looping over the output timesteps') for output, values in model_dict.items(): mean = np.empty((len(samples), len(values))) std = np.empty((len(samples), len(values))) idx = 0 + #print('Looping over ??') for in_key, InIdxValues in values.items(): # Prediction with GPE