From b0d08778deb4be11fa309766e66719eb04a7ff98 Mon Sep 17 00:00:00 2001 From: Farid Mohammadi <farid.mohammadi@iws.uni-stuttgart.de> Date: Tue, 8 Mar 2022 10:29:34 +0100 Subject: [PATCH] [src] refactor the scripts. --- src/bayesvalidrox.egg-info/PKG-INFO | 59 - src/bayesvalidrox.egg-info/SOURCES.txt | 33 - .../dependency_links.txt | 1 - src/bayesvalidrox.egg-info/requires.txt | 12 - src/bayesvalidrox.egg-info/top_level.txt | 1 - .../bayes_inference/bayes_inference.py | 2104 +++++++++-------- .../bayes_inference/discrepancy.py | 53 +- src/bayesvalidrox/bayes_inference/mcmc.py | 705 +++--- .../post_processing/post_processing.py | 1558 ++++++------ src/bayesvalidrox/pylink/pylink.py | 179 +- .../surrogate_models/exp_designs.py | 60 +- src/bayesvalidrox/surrogate_models/inputs.py | 2 +- .../surrogate_models/sequential_design.py | 11 +- .../surrogate_models/surrogate_models.py | 15 +- 14 files changed, 2556 insertions(+), 2237 deletions(-) delete mode 100644 src/bayesvalidrox.egg-info/PKG-INFO delete mode 100644 src/bayesvalidrox.egg-info/SOURCES.txt delete mode 100644 src/bayesvalidrox.egg-info/dependency_links.txt delete mode 100644 src/bayesvalidrox.egg-info/requires.txt delete mode 100644 src/bayesvalidrox.egg-info/top_level.txt diff --git a/src/bayesvalidrox.egg-info/PKG-INFO b/src/bayesvalidrox.egg-info/PKG-INFO deleted file mode 100644 index c3f4b4758..000000000 --- a/src/bayesvalidrox.egg-info/PKG-INFO +++ /dev/null @@ -1,59 +0,0 @@ -Metadata-Version: 2.1 -Name: bayesvalidrox -Version: 0.0.2 -Summary: An open-source, object-oriented Python package for surrogate-assisted Bayesain Validation of computational models. -Home-page: https://git.iws.uni-stuttgart.de/inversemodeling/bayesian-validation -Author: Farid Mohammadi -Author-email: farid.mohammadi@iws.uni-stuttgart.de -License: UNKNOWN -Platform: UNKNOWN -Classifier: Programming Language :: Python :: 3 -Classifier: License :: OSI Approved :: MIT License -Classifier: Operating System :: OS Independent -Requires-Python: >=3.8 -Description-Content-Type: text/markdown -License-File: LICENCE.md - -# BayesValidRox - -An open-source, object-oriented Python package for surrogate-assisted Bayesain Validation of computational models. -This framework provides an automated workflow for surrogate-based sensitivity analysis, Bayesian calibration, and validation of computational models with a modular structure. - -## Authors -- [@farid](https://git.iws.uni-stuttgart.de/farid) - -## Installation -Install my project with pip -```bash - pip install bayesvalidrox -``` -## Features -* Surrogate modeling with Polynomial Chaos Expansion -* Global sensitivity analysis using Sobol Indices -* Bayesian calibration with MCMC using `emcee` package -* Bayesian validation with model weights for multi-model setting - -## Examples -- [Surrogate modeling](https://git.iws.uni-stuttgart.de/inversemodeling/bayesian-validation/-/blob/cleanup/tests/AnalyticalFunction/example_analytical_function.ipynb) - -## Requirements -* numpy==1.22.1 -* pandas==1.2.4 -* joblib==1.0.1 -* matplotlib==3.4.2 -* seaborn==0.11.1 -* scikit-learn==0.24.2 -* tqdm==4.61.1 -* chaospy==4.3.3 -* emcee==3.0.2 -* tables==3.6.1 -* corner==2.2.1 -* h5py==3.2.1 - -## TexLive -Here you need super user rights -```bash -sudo apt-get install dvipng texlive-latex-extra texlive-fonts-recommended cm-super -``` - - diff --git a/src/bayesvalidrox.egg-info/SOURCES.txt b/src/bayesvalidrox.egg-info/SOURCES.txt deleted file mode 100644 index 1a1b68c02..000000000 --- a/src/bayesvalidrox.egg-info/SOURCES.txt +++ /dev/null @@ -1,33 +0,0 @@ -LICENCE.md -README.md -pyproject.toml -setup.cfg -src/bayesvalidrox/__init__.py -src/bayesvalidrox.egg-info/PKG-INFO -src/bayesvalidrox.egg-info/SOURCES.txt -src/bayesvalidrox.egg-info/dependency_links.txt -src/bayesvalidrox.egg-info/requires.txt -src/bayesvalidrox.egg-info/top_level.txt -src/bayesvalidrox/bayes_inference/__init__.py -src/bayesvalidrox/bayes_inference/bayes_inference.py -src/bayesvalidrox/bayes_inference/discrepancy.py -src/bayesvalidrox/bayes_inference/discrepancy_GP.py -src/bayesvalidrox/bayes_inference/discrepancy_GP_v1.py -src/bayesvalidrox/bayes_inference/mcmc.py -src/bayesvalidrox/post_processing/__init__.py -src/bayesvalidrox/post_processing/adaptPlot.py -src/bayesvalidrox/post_processing/post_processing.py -src/bayesvalidrox/pylink/__init__.py -src/bayesvalidrox/pylink/pylink.py -src/bayesvalidrox/surrogate_models/__init__.py -src/bayesvalidrox/surrogate_models/apoly_construction.py -src/bayesvalidrox/surrogate_models/bayes_linear.py -src/bayesvalidrox/surrogate_models/eval_rec_rule.py -src/bayesvalidrox/surrogate_models/exp_designs.py -src/bayesvalidrox/surrogate_models/exploration.py -src/bayesvalidrox/surrogate_models/glexindex.py -src/bayesvalidrox/surrogate_models/inputs.py -src/bayesvalidrox/surrogate_models/reg_fast_ard.py -src/bayesvalidrox/surrogate_models/reg_fast_laplace.py -src/bayesvalidrox/surrogate_models/sequential_design.py -src/bayesvalidrox/surrogate_models/surrogate_models.py \ No newline at end of file diff --git a/src/bayesvalidrox.egg-info/dependency_links.txt b/src/bayesvalidrox.egg-info/dependency_links.txt deleted file mode 100644 index 8b1378917..000000000 --- a/src/bayesvalidrox.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/bayesvalidrox.egg-info/requires.txt b/src/bayesvalidrox.egg-info/requires.txt deleted file mode 100644 index 82d34b9a7..000000000 --- a/src/bayesvalidrox.egg-info/requires.txt +++ /dev/null @@ -1,12 +0,0 @@ -numpy==1.22.1 -pandas==1.2.4 -joblib==1.0.1 -matplotlib==3.4.2 -seaborn==0.11.1 -scikit-learn==0.24.2 -tqdm==4.61.1 -chaospy==4.3.3 -emcee==3.0.2 -tables==3.6.1 -corner==2.2.1 -h5py==3.2.1 diff --git a/src/bayesvalidrox.egg-info/top_level.txt b/src/bayesvalidrox.egg-info/top_level.txt deleted file mode 100644 index 2753d5241..000000000 --- a/src/bayesvalidrox.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -bayesvalidrox diff --git a/src/bayesvalidrox/bayes_inference/bayes_inference.py b/src/bayesvalidrox/bayes_inference/bayes_inference.py index 3ba266cb9..06921e88e 100644 --- a/src/bayesvalidrox/bayes_inference/bayes_inference.py +++ b/src/bayesvalidrox/bayes_inference/bayes_inference.py @@ -24,142 +24,683 @@ import seaborn as sns import corner import h5py import gc -from joblib import Parallel, delayed -from scipy.stats import multivariate_normal from sklearn.metrics import mean_squared_error, r2_score -from sklearn.metrics.pairwise import rbf_kernel from sklearn import preprocessing +from matplotlib.patches import Patch +import matplotlib.lines as mlines from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pylab as plt -SIZE = 30 -plt.rc('figure', figsize=(24, 16)) -plt.rc('font', family='serif', serif='Arial') -plt.rc('font', size=SIZE) -plt.rc('text', usetex=True) -plt.rc('axes', linewidth=3) -plt.rc('axes', grid=True) -plt.rc('grid', linestyle="-") -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) # legend fontsize -plt.rc('figure', titlesize=SIZE) # fontsize of the figure title - - -from .discrepancy import Discrepancy + from .mcmc import MCMC -from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns + +# Load the mplstyle +plt.style.use(os.path.join(os.path.split(__file__)[0], + '../', 'bayesvalidrox.mplstyle')) class BayesInference: - def __init__(self, PCEModel): - self.PCEModel = PCEModel - self.Name = 'Calib' - self.SamplingMethod = 'rejection' - self.MAP = 'Mean' - self.PCEMeans = {} - self.PCEStd = {} - self.Discrepancy = None - self.ReqOutputType = None - self.NrofSamples = 500000 - self.Samples = None - self.MCMCnSteps = None - self.MCMCnwalkers = None - self.MCMCinitSamples = None - self.MCMCmoves = None - self.MCMCverbose = False - self.ModelOutputs = [] - - self.perturbedData = [] - self.MeasuredData = None - self.MeasurementError = [] - self.selectedIndices = None - - self.logLikelihoods = [] - self.logBME = [] - self.KLD = [] - self.infEntropy = [] - self.MAPorigModel = [] - self.MAPpceModelMean = [] - self.MAPpceModelStd = [] - self.logTOMBME = [] - self.Posterior_df = {} - self.PCEPriorPred = {} - - self.PlotPostPred = True - self.PlotMAPPred = False - self.emulator = True - self.Bootstrap = False - self.BayesLOOCV = False - self.BootstrapItrNr = 1 - self.BootstrapNoise = 0.05 - self.MultiProcessMCMC = None - self.Corner_title_fmt = '.3f' + """ + A class to perform Bayesian Analysis. + + + Attributes + ---------- + MetaModel : obj + Meta model object. + discrepancy : obj + The discrepancy object for the sigma2s, i.e. the diagonal entries + of the variance matrix for a multivariate normal likelihood. + name : string, optional + The type of analysis, either calibration (Calib) or validation + (Valid). The default is 'Calib'. + emulator : bool, optional + Analysis with emulator (MetaModel). The default is True. + bootstrap : bool, optional + Bootstrap the analysis. The default is False. + req_outputs : list, optional + The list of requested output to be used for the analysis. + The default is None. If None, all the defined outputs for the model + object is used. + selected_indices : dict, optional + A dictionary with the selected indices of each model output. The + default is None. If None, all measurement points are used in the + analysis. + samples : array of shape (n_samples, n_params), optional + The samples to be used in the analysis. The default is None. If + None the samples are drawn from the probablistic input parameter + object of the MetaModel object. + n_samples : int, optional + Number of samples to be used in the analysis. The default is 5e5. + If samples is not None, this argument will be assigned based on the + number of samples given. + measured_data : dict, optional + A dictionary containing the observation data. The default is None. + if None, the observation defined in the Model object of the + MetaModel is used. + inference_method : string, optional + A method for Bayesian inference. The default is 'rejection'. A + Markov Chain Monte Carlo sampler can be simply selected by passing + 'MCMC'. + mcmc_params : dict, optional + A dictionary with args required for the Bayesian inference with + MCMC. The default is None. + + Pass the mcmc_params like the following:: + + mcmc_params:{ + 'init_samples': None, # initial samples + 'n_walkers': 100, # number of walkers (chain) + 'n_steps': 100000, # number of maximum steps + 'n_burn': 200, # number of burn-in steps + 'moves': None, # Moves for the emcee sampler + 'multiprocessing': False, # multiprocessing + 'verbose': False # verbosity + } + The items shown above are the default values. If any parmeter is + not defined, the default value will be assigned to it. + bayes_loocv : bool, optional + Bayesian Leave-one-out Cross Validation. The default is False. If + True, the LOOCV procedure is used to estimate the bayesian Model + Evidence (BME). + n_bootstrap_itrs : int, optional + Number of bootstrap iteration. The default is 1. If bayes_loocv is + True, this is qualt to the total length of the observation data + set. + perturbed_data : array of shape (n_bootstrap_itrs, n_obs), optional + User defined perturbed data. The default is []. + bootstrap_noise : float, optional + A noise level to perturb the data set. The default is 0.05. + plot_post_pred : bool, optional + Plot posterior predictive plots. The default is True. + plot_map_pred : bool, optional + Plot the model outputs vs the metamodel predictions for the maximum + a posteriori (defined as max_a_posteriori) parameter set. The + default is False. + max_a_posteriori : string, optional + Maximum a posteriori. 'mean' and 'mode' are available. The default + is 'mean'. + corner_title_fmt : string, optional + Title format for the posterior distribution plot with python + package corner. The default is '.3f'. + + Methods + ------- + create_inference(additional=""): + This method starts the analysis. + """ + + def __init__(self, MetaModel, discrepancy=None, emulator=True, + name='Calib', bootstrap=False, req_outputs=None, + selected_indices=None, samples=None, n_samples=500000, + measured_data=None, inference_method='rejection', + mcmc_params=None, bayes_loocv=False, n_bootstrap_itrs=1, + perturbed_data=[], bootstrap_noise=0.05, plot_post_pred=True, + plot_map_pred=False, max_a_posteriori='mean', + corner_title_fmt='.3f'): + + self.MetaModel = MetaModel + self.Discrepancy = discrepancy + self.emulator = emulator + self.name = name + self.bootstrap = bootstrap + self.req_outputs = req_outputs + self.selected_indices = selected_indices + self.samples = samples + self.n_samples = n_samples + self.measured_data = measured_data + self.inference_method = inference_method + self.mcmc_params = mcmc_params + self.perturbed_data = perturbed_data + self.bayes_loocv = bayes_loocv + self.n_bootstrap_itrs = n_bootstrap_itrs + self.bootstrap_noise = bootstrap_noise + self.plot_post_pred = plot_post_pred + self.plot_map_pred = plot_map_pred + self.max_a_posteriori = max_a_posteriori + self.corner_title_fmt = corner_title_fmt # ------------------------------------------------------------------------- - def _logpdf(self, x, mean, cov): - n = len(mean) - L = spla.cholesky(cov, lower=True) - beta = np.sum(np.log(np.diag(L))) - dev = x - mean - alpha = dev.dot(spla.cho_solve((L, True), dev)) - return -0.5 * alpha - beta - n / 2. * np.log(2 * np.pi) + def create_inference(self): + """ + Starts the inference. - # ------------------------------------------------------------------------- - def get_Sample(self): + Returns + ------- + BayesInference : obj + The Bayes inference object. + + """ + + # Set some variables + MetaModel = self.MetaModel + Model = MetaModel.ModelObj + n_params = MetaModel.n_params + output_names = Model.Output.names + par_names = MetaModel.ExpDesign.par_names + + # If the prior is set by the user, take it. + if self.samples is None: + self.samples = MetaModel.ExpDesign.generate_samples( + self.n_samples, 'random') + else: + try: + samples = self.samples.values + except AttributeError: + samples = self.samples + + # Take care of an additional Sigma2s + self.samples = samples[:, :n_params] + + # Update number of samples + self.n_samples = self.samples.shape[0] + + # ---------- Preparation of observation data ---------- + # Read observation data and perturb it if requested. + if self.measured_data is None: + self.measured_data = Model.read_observation(case=self.name) + # Convert measured_data to a data frame + if not isinstance(self.measured_data, pd.DataFrame): + self.measured_data = pd.DataFrame(self.measured_data) + + # Extract the total number of measurement points + if self.name.lower() == 'calib': + self.n_tot_measurement = Model.n_obs + else: + self.n_tot_measurement = Model.n_obs_valid + + if hasattr(self, 'measurement_error') \ + and len(self.measurement_error) == 0: + if isinstance(self.Discrepancy, dict): + Disc = self.Discrepancy['known'] + else: + Disc = self.Discrepancy + if isinstance(Disc, dict): + self.measurement_error = {k: np.sqrt(Disc.Parameters[k]) for k + in Disc.Parameters.keys()} + else: + try: + self.measurement_error = np.sqrt(Disc.Parameters) + except: + pass + + # ---------- Preparation of variance for covariance matrix ---------- + # Independent and identically distributed + total_sigma2 = dict() + opt_sigma_flag = isinstance(self.Discrepancy, dict) + opt_sigma = None + for key_idx, key in enumerate(output_names): + + # Find opt_sigma + if opt_sigma_flag and opt_sigma is None: + # Option A: known error with unknown bias term + opt_sigma = 'A' + known_discrepancy = self.Discrepancy['known'] + self.Discrepancy = self.Discrepancy['infer'] + sigma2 = np.array(known_discrepancy.parameters[key]) + + elif opt_sigma == 'A' or self.Discrepancy.parameters is not None: + # Option B: The sigma2 is known (no bias term) + if opt_sigma == 'A': + sigma2 = np.array(known_discrepancy.parameters[key]) + else: + opt_sigma = 'B' + sigma2 = np.array(self.Discrepancy.parameters[key]) + + elif not isinstance(self.Discrepancy.InputDisc, str): + # Option C: The sigma2 is unknown (bias term including error) + opt_sigma = 'C' + self.Discrepancy.opt_sigma = opt_sigma + n_measurement, n_outs = self.measured_data[key].values.shape + sigma2 = np.zeros((n_measurement)) + + total_sigma2[key] = sigma2 + + self.Discrepancy.opt_sigma = opt_sigma + self.Discrepancy.total_sigma2 = total_sigma2 + + # If inferred sigma2s obtained from e.g. calibration are given + try: + self.sigma2s = self.Discrepancy.get_sample(self.n_samples) + except: + pass + + # ---------------- Bootstrap & TOM -------------------- + if self.bootstrap or self.bayes_loocv: + if len(self.perturbed_data) == 0: + # zero mean noise Adding some noise to the observation function + self.perturbed_data = self._perturb_data( + self.measured_data, output_names + ) + else: + self.n_bootstrap_itrs = len(self.perturbed_data) + + # -------- Model Discrepancy ----------- + if hasattr(self, 'error_model') and self.error_model \ + and self.name.lower() != 'calib': + # Select posterior mean as MAP + MAP_theta = self.samples.mean(axis=0).reshape((1, n_params)) + # MAP_theta = stats.mode(self.samples,axis=0)[0] + + # Evaluate the (meta-)model at the MAP + y_MAP, y_std_MAP = MetaModel.eval_metamodel(samples=MAP_theta) + + # Train a GPR meta-model using MAP + self.error_MetaModel = MetaModel.create_model_error( + self.bias_inputs, y_MAP, Name=self.name + ) + + # ----------------------------------------------------- + # ----- Loop over the perturbed observation data ------ + # ----------------------------------------------------- + # Initilize arrays + logLikelihoods = np.zeros((self.n_samples, self.n_bootstrap_itrs), + dtype=np.float16) + BME_Corr = np.zeros((self.n_bootstrap_itrs)) + log_BME = np.zeros((self.n_bootstrap_itrs)) + KLD = np.zeros((self.n_bootstrap_itrs)) + inf_entropy = np.zeros((self.n_bootstrap_itrs)) + + # Compute the prior predtions + # Evaluate the MetaModel + if self.emulator: + y_hat, y_std = MetaModel.eval_metamodel(samples=self.samples) + self.__mean_pce_prior_pred = y_hat + self._std_pce_prior_pred = y_std + + # Correct the predictions with Model discrepancy + if hasattr(self, 'error_model') and self.error_model: + y_hat_corr, y_std = self.error_MetaModel.eval_model_error( + self.bias_inputs, self.__mean_pce_prior_pred + ) + self.__mean_pce_prior_pred = y_hat_corr + self._std_pce_prior_pred = y_std + + # Surrogate model's error using RMSE of test data + if hasattr(MetaModel, 'rmse'): + surrError = MetaModel.rmse + else: + surrError = None + + else: + # Evaluate the original model + self.__model_prior_pred = self._eval_model( + samples=self.samples, key='PriorPred' + ) + + # Start the likelihood-BME computations for the perturbed data + for itr_idx, data in tqdm( + enumerate(self.perturbed_data), ascii=True, + desc="Boostraping the BME calculations" + ): + + # ---------------- Likelihood calculation ---------------- + if self.emulator: + model_evals = self.__mean_pce_prior_pred + else: + model_evals = self.__model_prior_pred + + # Leave one out + if self.bayes_loocv: + self.selected_indices = np.nonzero(data)[0] + + # Prepare data dataframe + nobs = list(self.measured_data.count().values[1:]) + numbers = list(map(sum, zip([0] + nobs, nobs))) + indices = list(zip([0] + numbers, numbers)) + data_dict = { + output_names[i]: data[j:k] for i, (j, k) in + enumerate(indices) + } + + # Unknown sigma2 + if opt_sigma == 'C' or hasattr(self, 'sigma2s'): + logLikelihoods[:, itr_idx] = self.normpdf( + model_evals, data_dict, total_sigma2, + sigma2=self.sigma2s, std=surrError + ) + else: + # known sigma2 + logLikelihoods[:, itr_idx] = self.normpdf( + model_evals, data_dict, total_sigma2, + std=surrError + ) + + # ---------------- BME Calculations ---------------- + # BME (log) + log_BME[itr_idx] = np.log( + np.nanmean(np.exp(logLikelihoods[:, itr_idx], + dtype=np.float128)) + ) + + # Rejection Step + # Random numbers between 0 and 1 + unif = np.random.rand(1, self.n_samples)[0] + + # Reject the poorly performed prior + Likelihoods = np.exp(logLikelihoods[:, itr_idx], + dtype=np.float64) + accepted = (Likelihoods/np.max(Likelihoods)) >= unif + posterior = self.samples[accepted] + + # Posterior-based expectation of likelihoods + postExpLikelihoods = np.mean( + logLikelihoods[:, itr_idx][accepted] + ) + + # Posterior-based expectation of prior densities + # postExpPrior = np.mean( + # np.log([MetaModel.ExpDesign.JDist.pdf(posterior.T)]) + # ) + + # Calculate Kullback-Leibler Divergence + KLD[itr_idx] = postExpLikelihoods - log_BME[itr_idx] + + # Information Entropy based on Entropy paper Eq. 38 + # inf_entropy[itr_idx] = log_BME[itr_idx] - postExpPrior - \ + # postExpLikelihoods + + # TODO: BME correction when using Emulator + # if self.emulator: + # BME_Corr[itr_idx] = self.BME_Corr_Weight( + # data, total_sigma2, posterior + # ) + + # Clear memory + gc.collect(generation=2) + + # ---------------- Store BME, Likelihoods for all ---------------- + # Likelihoods (Size: n_samples, n_bootstrap_itr) + self.log_likes = logLikelihoods + + # BME (log), KLD, infEntropy (Size: 1,n_bootstrap_itr) + self.log_BME = log_BME + self.KLD = KLD + self.inf_entropy = inf_entropy + + # TODO: BMECorrFactor (log) (Size: 1,n_bootstrap_itr) + # if self.emulator: self.BMECorrFactor = BME_Corr + + # BME = BME + BMECorrFactor + if self.emulator: + self.log_BME = self.log_BME # + self.BMECorrFactor + + # ---------------- Parameter Bayesian inference ---------------- + if self.inference_method.lower() == 'mcmc': + # Instantiate the MCMC object + MCMC_Obj = MCMC(self) + self.posterior_df = MCMC_Obj.run_sampler( + self.measured_data, total_sigma2 + ) + + elif self.name.lower() == 'valid': + # Convert to a dataframe if samples are provided after calibration. + self.posterior_df = pd.DataFrame(self.samples, columns=par_names) + + else: + # Rejection sampling + self.posterior_df = self.rejection_sampling() + + # Provide posterior's summary + print('\n') + print('-'*15 + 'Posterior summary' + '-'*15) + pd.options.display.max_columns = None + pd.options.display.max_rows = None + print(self.posterior_df.describe()) + print('-'*50) + + # -------- Model Discrepancy ----------- + if hasattr(self, 'error_model') and self.error_model \ + and self.name.lower() == 'calib': + if self.inference_method.lower() == 'mcmc': + self.error_MetaModel = MCMC_Obj.error_MetaModel + else: + # Select posterior mean as MAP + if opt_sigma == "B": + posterior_df = self.posterior_df.values + else: + posterior_df = self.posterior_df.values[:, :-Model.n_outputs] + + # Select posterior mean as Maximum a posteriori + map_theta = posterior_df.mean(axis=0).reshape((1, n_params)) + # map_theta = stats.mode(Posterior_df,axis=0)[0] + + # Evaluate the (meta-)model at the MAP + y_MAP, y_std_MAP = MetaModel.eval_metamodel(samples=map_theta) + + # Train a GPR meta-model using MAP + self.error_MetaModel = MetaModel.create_model_error( + self.bias_inputs, y_MAP, Name=self.name + ) + + # -------- Posterior perdictive ----------- + self.posterior_predictive() + + # ----------------------------------------------------- + # ------------------ Visualization -------------------- + # ----------------------------------------------------- + # Create Output directory, if it doesn't exist already. + out_dir = f'Outputs_Bayes_{Model.name}_{self.name}' + os.makedirs(out_dir, exist_ok=True) + + # -------- Posteior parameters -------- + if opt_sigma != "B": + par_names.extend( + [self.Discrepancy.InputDisc.Marginals[i].name for i + in range(len(self.Discrepancy.InputDisc.Marginals))] + ) + # Pot with corner + figPosterior = corner.corner(self.posterior_df.to_numpy(), + labels=par_names, + quantiles=[0.15, 0.5, 0.85], + show_titles=True, + title_fmt=self.corner_title_fmt, + labelpad=0.2, + use_math_text=True, + title_kwargs={"fontsize": 28}, + plot_datapoints=False, + plot_density=False, + fill_contours=True, + smooth=0.5, + smooth1d=0.5) + + # Loop over axes and set x limits + if opt_sigma == "B": + axes = np.array(figPosterior.axes).reshape( + (len(par_names), len(par_names)) + ) + for yi in range(len(par_names)): + ax = axes[yi, yi] + ax.set_xlim(MetaModel.bound_tuples[yi]) + for xi in range(yi): + ax = axes[yi, xi] + ax.set_xlim(MetaModel.bound_tuples[xi]) + + # Turn off gridlines + for ax in figPosterior.axes: + ax.grid(False) + + if self.emulator: + plotname = f'/Posterior_Dist_{Model.name}_emulator' + else: + plotname = f'/Posterior_Dist_{Model.name}' + + figPosterior.set_size_inches((24, 16)) + figPosterior.savefig(f'./{out_dir}{plotname}.pdf', + bbox_inches='tight') + + # -------- Plot MAP -------- + if self.plot_map_pred: + self._plot_max_a_posteriori() + + # -------- Plot log_BME dist -------- + if self.bootstrap: + # Computing the TOM performance + self.log_BME_tom = stats.chi2.rvs( + self.n_tot_measurement, size=self.log_BME.shape[0] + ) + + fig, ax = plt.subplots() + sns.kdeplot(self.log_BME_tom, ax=ax, color="green", shade=True) + sns.kdeplot( + self.log_BME, ax=ax, color="blue", shade=True, + label='Model BME') + + ax.set_xlabel('log$_{10}$(BME)') + ax.set_ylabel('Probability density') + + legend_elements = [ + Patch(facecolor='green', edgecolor='green', label='TOM BME'), + Patch(facecolor='blue', edgecolor='blue', label='Model BME') + ] + ax.legend(handles=legend_elements) - PCEModel = self.PCEModel + if self.emulator: + plotname = f'/BME_hist_{Model.name}_emulator' + else: + plotname = f'/BME_hist_{Model.name}' - NrSamples = self.NrofSamples + plt.savefig(f'./{out_dir}{plotname}.pdf', bbox_inches='tight') + plt.show() + plt.close() - self.Samples = PCEModel.ExpDesign.generate_samples(NrSamples, 'random') + # -------- Posteior perdictives -------- + if self.plot_post_pred: + # Plot the posterior predictive + self._plot_post_predictive() - return self.Samples + return self # ------------------------------------------------------------------------- - def eval_Model(self, Samples=None, key='MAP'): + def _perturb_data(self, data, output_names): """ - Evaluate Forward Model + Returns an array with n_bootstrap_itrs rowsof perturbed data. + The first row includes the original observation data. + If `self.bayes_loocv` is True, a 2d-array will be returned with + repeated rows and zero diagonal entries. + + Parameters + ---------- + data : pandas DataFrame + Observation data. + output_names : list + List of the output names. + + Returns + ------- + final_data : array + Perturbed data set. """ - index = self.PCEModel.index - Model = self.PCEModel.ModelObj + noise_level = self.bootstrap_noise + obs_data = data[output_names].values + n_measurement, n_outs = obs_data.shape + self.n_tot_measurement = obs_data[~np.isnan(obs_data)].shape[0] + # Number of bootstrap iterations + if self.bayes_loocv: + self.n_bootstrap_itrs = self.n_tot_measurement + + # Pass loocv dataset + if self.bayes_loocv: + obs = obs_data.T[~np.isnan(obs_data.T)] + final_data = np.repeat(np.atleast_2d(obs), self.n_bootstrap_itrs, + axis=0) + np.fill_diagonal(final_data, 0) + return final_data - if Samples is None: - Samples = self.get_Sample() - self.Samples = Samples else: - Samples = Samples - self.NrofSamples = len(Samples) + final_data = np.zeros( + (self.n_bootstrap_itrs, self.n_tot_measurement) + ) + final_data[0] = obs_data.T[~np.isnan(obs_data.T)] + for itrIdx in range(1, self.n_bootstrap_itrs): + data = np.zeros((n_measurement, n_outs)) + for idx in range(len(output_names)): + std = np.nanstd(obs_data[:, idx]) + if std == 0: + std = 0.001 + noise = std * noise_level + data[:, idx] = np.add( + obs_data[:, idx], + np.random.normal(0, 1, obs_data.shape[0]) * noise, + ) + + final_data[itrIdx] = data.T[~np.isnan(data.T)] + + return final_data - ModelOutputs, _ = Model.run_model_parallel(Samples, - keyString=key+self.Name) + # ------------------------------------------------------------------------- + def _logpdf(self, x, mean, cov): + """ + computes the likelihood based on a multivariate normal distribution. - if index is not None: - if self.Name == 'Valid': - self.ModelOutputs = {key: ModelOutputs[key][index:] for key in [list(ModelOutputs.keys())[0]]} - Outputs = {key: ModelOutputs[key][:,index:] for key in Model.Output.Names} - else: - self.ModelOutputs = {key: ModelOutputs[key][:index] for key in [list(ModelOutputs.keys())[0]]} - Outputs = {key: ModelOutputs[key][:,:index] for key in Model.Output.Names} - self.ModelOutputs.update(Outputs) + Parameters + ---------- + x : TYPE + DESCRIPTION. + mean : array_like + Observation data. + cov : 2d array + Covariance matrix of the distribution. + + Returns + ------- + log_lik : float + Log likelihood. + + """ + n = len(mean) + L = spla.cholesky(cov, lower=True) + beta = np.sum(np.log(np.diag(L))) + dev = x - mean + alpha = dev.dot(spla.cho_solve((L, True), dev)) + log_lik = -0.5 * alpha - beta - n / 2. * np.log(2 * np.pi) + return log_lik + + # ------------------------------------------------------------------------- + def _eval_model(self, samples=None, key='MAP'): + """ + Evaluates Forward Model. + + Parameters + ---------- + samples : array of shape (n_samples, n_params), optional + Parameter sets. The default is None. + key : string, optional + Key string to be passed to the run_model_parallel method. + The default is 'MAP'. + + Returns + ------- + model_outputs : TYPE + DESCRIPTION. + + """ + MetaModel = self.MetaModel + Model = MetaModel.ModelObj + + if samples is None: + self.samples = MetaModel.ExpDesign.generate_samples( + self.n_samples, 'random') else: - self.ModelOutputs = ModelOutputs + self.samples = samples + self.n_samples = len(samples) + + model_outputs, _ = Model.run_model_parallel( + self.samples, key_str=key+self.name) # Clean up # Zip the subdirectories try: - dir_name = f'{Model.name}MAP{self.Name}' + dir_name = f'{Model.name}MAP{self.name}' key = dir_name + '_' Model.zip_subdirs(dir_name, key) except: pass - return self.ModelOutputs + return model_outputs # ------------------------------------------------------------------------- - def RBF_kernel(self, X, Sigma2): + def RBF_kernel(self, X, hyperparameters): """ Isotropic squared exponential kernel. @@ -176,7 +717,7 @@ class BayesInference: hyperparameters : Dict Lambda characteristic length sigma_f controls the marginal variance of b(x) - sigma_0 unresolvable error) nugget term that is interpreted as random + sigma_0 unresolvable error nugget term, interpreted as random error that cannot be attributed to measurement error. Returns ------- @@ -188,11 +729,11 @@ class BayesInference: min_max_scaler = preprocessing.MinMaxScaler() X_minmax = min_max_scaler.fit_transform(X) - nparams = len(Sigma2) + nparams = len(hyperparameters) # characteristic length (0,1] - Lambda = Sigma2[0] + Lambda = hyperparameters[0] # sigma_f controls the marginal variance of b(x) - sigma2_f = Sigma2[1] + sigma2_f = hyperparameters[1] # cov_matrix = sigma2_f*rbf_kernel(X_minmax, gamma = 1/Lambda**2) @@ -201,42 +742,82 @@ class BayesInference: if nparams > 2: # (unresolvable error) nugget term that is interpreted as random # error that cannot be attributed to measurement error. - sigma2_0 = Sigma2[2:] + sigma2_0 = hyperparameters[2:] for i, j in np.ndindex(cov_matrix.shape): - cov_matrix[i, j] += np.sum(sigma2_0) if i==j else 0 + cov_matrix[i, j] += np.sum(sigma2_0) if i == j else 0 return cov_matrix # ------------------------------------------------------------------------- - def normpdf(self, Outputs, Data, TotalSigma2s, Sigma2=None, std=None): + def normpdf(self, outputs, obs_data, total_sigma2s, sigma2=None, std=None): + """ + Calculates the likelihood of simulation outputs compared with + observation data. - Model = self.PCEModel.ModelObj + Parameters + ---------- + outputs : dict + A dictionary containing the simulation outputs as array of shape + (n_samples, n_measurement) for each model output. + obs_data : dict + A dictionary/dataframe containing the observation data. + total_sigma2s : dict + A dictionary with known values of the covariance diagonal entries, + a.k.a sigma^2. + sigma2 : array, optional + An array of the sigma^2 samples, when the covariance diagonal + entries are unknown and are being jointly inferred. The default is + None. + std : dict, optional + A dictionary containing the root mean squared error as array of + shape (n_samples, n_measurement) for each model output. The default + is None. + + Returns + ------- + logLik : array of shape (n_samples) + Likelihoods. + + """ + Model = self.MetaModel.ModelObj logLik = 0.0 - formatting_function = np.vectorize(lambda f: format(f, '6.2e')) # Extract the requested model outputs for likelihood calulation - if self.ReqOutputType is None: - OutputType = Model.Output.Names + if self.req_outputs is None: + req_outputs = Model.Output.names else: - OutputType = list(self.ReqOutputType) + req_outputs = list(self.req_outputs) # Loop over the outputs - for idx, out in enumerate(OutputType): + for idx, out in enumerate(req_outputs): # (Meta)Model Output - nsamples, nout = Outputs[out].shape + nsamples, nout = outputs[out].shape # Prepare data and remove NaN try: - data = Data[out].to_numpy()[~np.isnan(Data[out])] - except: - data = Data[out][~np.isnan(Data[out])] - totalSigma2s = TotalSigma2s[out][~np.isnan(TotalSigma2s[out])][:nout] + data = obs_data[out].values[~np.isnan(obs_data[out])] + except AttributeError: + data = obs_data[out][~np.isnan(obs_data[out])] - # If sigma2 is not given, use given TotalSigma2s - if Sigma2 is None: - logLik += stats.multivariate_normal.logpdf(Outputs[out], data, - np.diag(totalSigma2s)) + # Prepare sigma2s + tot_sigma2s = total_sigma2s[out][~np.isnan( + total_sigma2s[out])][:nout] + + # Add the std of the PCE is chosen as emulator. + if self.emulator: + if std is not None: + std_pce = std[out] + else: + std_pce = np.mean( + self._std_pce_prior_pred[out], axis=0) + # Expected value of variance (Assump: i.i.d stds) + tot_sigma2s += std_pce**2 + + # If sigma2 is not given, use given total_sigma2s + if sigma2 is None: + logLik += stats.multivariate_normal.logpdf( + outputs[out], data, np.diag(tot_sigma2s)) continue # Loop over each run/sample and calculate logLikelihood @@ -244,188 +825,71 @@ class BayesInference: for s_idx in range(nsamples): # Simulation run - TotalOutputs = Outputs[out] + tot_outputs = outputs[out] # Covariance Matrix - covMatrix = np.diag(totalSigma2s) + covMatrix = np.diag(tot_sigma2s) - if Sigma2 is not None: + if sigma2 is not None: # Check the type error term - if hasattr(self, 'BiasInputs') and not hasattr(self, 'errorModel'): - # TODO: Infer a Bias model usig Gaussian Process Regression - BiasInputs = np.hstack((self.BiasInputs[out], - TotalOutputs[s_idx].reshape(-1,1))) - # EDY = self.PCEModel.ExpDesign.Y[out] - # BiasInputs = np.hstack((self.BiasInputs[out],EDY.T)) - params = Sigma2[s_idx,:3] if idx==0 else Sigma2[s_idx,3:] - covMatrix = self.RBF_kernel(BiasInputs, params) - # covMatrix = self.RBF_kernel(self.BiasInputs[out], Sigma2) + if hasattr(self, 'bias_inputs') and \ + not hasattr(self, 'error_model'): + # Infer a Bias model usig Gaussian Process Regression + bias_inputs = np.hstack( + (self.bias_inputs[out], + tot_outputs[s_idx].reshape(-1, 1))) + + params = sigma2[s_idx, idx*3:(idx+1)*3] + covMatrix = self.RBF_kernel(bias_inputs, params) else: # Infer equal sigma2s try: - sigma2 = Sigma2[s_idx, idx] - except: - sigma2 = 0.0 + sigma_2 = sigma2[s_idx, idx] + except TypeError: + sigma_2 = 0.0 - covMatrix += sigma2 * np.eye(nout) - # covMatrix = np.diag(sigma2 * totalSigma2s) - - # Add the std of the PCE is chosen as emulator. - if self.emulator: - stdPCE = std[out] if std is not None else np.mean(self._stdPCEPriorPred[out], axis=0) - # Expected value of variance (Assump: i.i.d stds) - covMatrix += np.diag(stdPCE**2) + covMatrix += sigma_2 * np.eye(nout) + # covMatrix = np.diag(sigma2 * total_sigma2s) - # print(formatting_function(covMatrix)) # Select the data points to compare - if self.selectedIndices is not None: - indices = self.selectedIndices[out] + if self.selected_indices is not None: + indices = self.selected_indices[out] covMatrix = np.diag(covMatrix[indices, indices]) - else: - indices = list(range(nout)) - - # Compute loglikelihood - logliks[s_idx] = self._logpdf(TotalOutputs[s_idx, indices], - data[indices], covMatrix) - - logLik += logliks - - return logLik - - # ------------------------------------------------------------------------- - def normpdferror(self, deviation, stds): - PCEModel = self.PCEModel - Model = PCEModel.ModelObj - - # Extract the requested model outputs for likelihood calulation - if self.ReqOutputType is None: - OutputType = Model.Output.Names - else: - OutputType = list(self.ReqOutputType) - SampleSize, ndata = deviation[OutputType[0]].shape - - # # Flatten the Output - # TotalDeviation = np.empty((SampleSize,0)) - # TotalStd = np.empty((SampleSize,0)) - # for idx, outputType in enumerate(OutputType): - # TotalDeviation = np.hstack((TotalDeviation, deviation[outputType])) - # TotalStd = np.hstack((TotalStd, stds[outputType])) - # Likelihoods = np.zeros(SampleSize) - # for i in range(SampleSize): - # dev = TotalDeviation[i] - # std = TotalStd[i] - # covMatrix = np.diag(std**2) - # denom2 = (((2*np.pi)**(ndata/2)) * np.sqrt(np.linalg.det(covMatrix))) - # Likelihoods[i] = np.exp(-0.5 * np.dot(np.dot(dev, np.linalg.pinv(covMatrix)), dev[:,np.newaxis]))[0]/denom2 - logLik = np.zeros((SampleSize, len(OutputType))) - for outIdx, out in enumerate(OutputType): - dev = deviation[out] - ndata = dev.shape[1] - for idx in range(ndata): - beta = PCEModel.clf_poly['Z']['y_'+str(idx+1)].alpha_ - std = np.sqrt(1./beta) - logLik[:, outIdx] = stats.norm(0, std).logpdf(dev[:, idx]) - - logLik = np.sum(logLik, axis=1) - return logLik - - # ------------------------------------------------------------------------- - def normpdfSigma2(self, Outputs, Data, TotalSigma2s, Sigma2=None, std=None): - - Model = self.PCEModel.ModelObj - logLik = 0.0 - - # Extract the requested model outputs for likelihood calulation - if self.ReqOutputType is None: - OutputType = Model.Output.Names - else: - OutputType = list(self.ReqOutputType) - - # Loop over the outputs and calculate logLikelihood - for idx, out in enumerate(OutputType): - - # (Meta)Model Output - TotalOutputs = Outputs[out] - nout = TotalOutputs.shape[1] - - # Remove NaN - try: - data = Data[out].to_numpy()[~np.isnan(Data[out])] - except: - data = Data[out][~np.isnan(Data[out])] - totalSigma2s = TotalSigma2s[out][~np.isnan(TotalSigma2s[out])][:nout] - - # Covariance diagonal entries - biasSigma2s = np.zeros((len(Sigma2), nout)) - # biasSigma2s = np.repeat(totalSigma2s.reshape(1,-1),len(Sigma2),axis=0) - - for sidx,sigma2s in enumerate(Sigma2): - # Check the type error term - if hasattr(self, 'BiasInputs'): - # TODO: Infer a Bias model usig Gaussian Process Regression - BiasInputs = np.hstack((self.BiasInputs[out],TotalOutputs.T)) - biasSigma2s = self.RBF_kernel(BiasInputs, Sigma2) - # biasSigma2s = self.RBF_kernel(self.BiasInputs[out], Sigma2) - else: - # Infer equal sigma2s - try: - sigma2 = sigma2s[idx] - except: - sigma2 = 0.0 - - # Convert biasSigma2s to a covMatrix - # biasSigma2s[sidx] = sigma2 * np.ones(shape=nout) - biasSigma2s[sidx] = sigma2 * totalSigma2s - - # Add the std of the PCE is chosen as emulator. - if self.emulator: - stdPCE = std[out] if std is not None else np.mean(self._stdPCEPriorPred[out], axis=0) - # Expected value of variance (Assump: i.i.d stds) - biasSigma2s[sidx] += stdPCE**3 + else: + indices = list(range(nout)) - # Select the data points to compare - if self.selectedIndices is not None: - biasSigma2s[sidx] = biasSigma2s[self.selectedIndices[out]] - - # Select the data points to compare - if self.selectedIndices is not None: - TotalOutputs = TotalOutputs[:, self.selectedIndices[out]] - data = data[self.selectedIndices[out]] - - # Compute loglikelihood - split_Outputs = np.array_split(TotalOutputs, 8) - split_cov = np.array_split(biasSigma2s, 8) - outList = Parallel(n_jobs=-1, prefer='threads')( - delayed(stats.multivariate_normal.logpdf)(output, data, np.diag(cov)) - for output, cov in zip(split_Outputs,split_cov)) - logLik += np.concatenate(outList).ravel() + # Compute loglikelihood + logliks[s_idx] = self._logpdf( + tot_outputs[s_idx, indices], data[indices], covMatrix + ) + logLik += logliks return logLik # ------------------------------------------------------------------------- - def BME_Corr_Weight(self, Data, TotalSigma2s, posterior): + def BME_Corr_Weight(self, Data, total_sigma2s, posterior): """ Calculates the correction factor for BMEs. """ - PCEModel = self.PCEModel - OrigModelOutput = PCEModel.ExpDesign.Y - Model = PCEModel.ModelObj + MetaModel = self.MetaModel + OrigModelOutput = MetaModel.ExpDesign.Y + Model = MetaModel.ModelObj # Posterior with guassian-likelihood postDist = stats.gaussian_kde(posterior.T) # Remove NaN Data = Data[~np.isnan(Data)] - TotalSigma2s = TotalSigma2s[~np.isnan(TotalSigma2s)] + total_sigma2s = total_sigma2s[~np.isnan(total_sigma2s)] # Covariance Matrix - covMatrix = np.diag(TotalSigma2s[:self.ntotMeasurement]) + covMatrix = np.diag(total_sigma2s[:self.n_tot_measurement]) # Extract the requested model outputs for likelihood calulation - if self.ReqOutputType is None: - OutputType = Model.Output.Names + if self.req_outputs is None: + OutputType = Model.Output.names else: - OutputType = list(self.ReqOutputType) + OutputType = list(self.req_outputs) # SampleSize = OrigModelOutput[OutputType[0]].shape[0] @@ -433,15 +897,15 @@ class BayesInference: # Flatten the OutputType for OrigModel TotalOutputs = np.concatenate([OrigModelOutput[x] for x in OutputType], 1) - NrofBayesSamples = self.NrofSamples - # Evaluate PCEModel on the experimental design - Samples = PCEModel.ExpDesign.X - OutputRS, stdOutputRS = PCEModel.eval_metamodel(samples=Samples,name=self.Name) + NrofBayesSamples = self.n_samples + # Evaluate MetaModel on the experimental design + Samples = MetaModel.ExpDesign.X + OutputRS, stdOutputRS = MetaModel.eval_metamodel(samples=Samples) # Reset the NrofSamples to NrofBayesSamples - self.NrofSamples = NrofBayesSamples + self.n_samples = NrofBayesSamples - # Flatten the OutputType for PCEModel + # Flatten the OutputType for MetaModel TotalPCEOutputs = np.concatenate([OutputRS[x] for x in OutputRS], 1) TotalPCEstdOutputRS= np.concatenate([stdOutputRS[x] for x in stdOutputRS], 1) @@ -519,844 +983,498 @@ class BayesInference: # return np.log(BME_Corr[0]) - #-------------------------------------------------------------------------------------------------------- - def Rejection_Sampling(self): + # ------------------------------------------------------------------------- + def rejection_sampling(self): + """ + Performs rejection sampling to update the prior distribution on the + input parameters. + + Returns + ------- + posterior : pandas.dataframe + Posterior samples of the input parameters. + + """ - PCEModel = self.PCEModel + MetaModel = self.MetaModel try: - Sigma2Prior = self.Discrepancy.Sigma2Prior + sigma2_prior = self.Discrepancy.sigma2_prior except: - Sigma2Prior = None + sigma2_prior = None # Check if the discrepancy is defined as a distribution: - MCSamples = self.Samples + samples = self.samples - if Sigma2Prior is not None: - MCSamples = np.hstack((MCSamples, Sigma2Prior)) + if sigma2_prior is not None: + samples = np.hstack((samples, sigma2_prior)) # Take the first column of Likelihoods (Observation data without noise) - Likelihoods = np.exp(self.logLikelihoods[:,0]) - NrofSamples = len(Likelihoods) - NormLikelihoods = Likelihoods / np.max(Likelihoods) + likelihoods = np.exp(self.log_likes[:, 0]) + n_samples = len(likelihoods) + norm_ikelihoods = likelihoods / np.max(likelihoods) # Normalize based on min if all Likelihoods are zero - if all(Likelihoods == 0.0): - Likelihoods = self.logLikelihoods[:,0] - NormLikelihoods = Likelihoods / np.min(Likelihoods) + if all(likelihoods == 0.0): + likelihoods = self.log_likes[:, 0] + norm_ikelihoods = likelihoods / np.min(likelihoods) # Random numbers between 0 and 1 - unif = np.random.rand(1, NrofSamples)[0] + unif = np.random.rand(1, n_samples)[0] # Reject the poorly performed prior - acceptedSamples = MCSamples[NormLikelihoods >= unif] + accepted_samples = samples[norm_ikelihoods >= unif] # Output the Posterior + par_names = MetaModel.ExpDesign.par_names + if sigma2_prior is not None: + for name in self.Discrepancy.name: + par_names.append(name) + + return pd.DataFrame(accepted_samples, columns=sigma2_prior) + + # ------------------------------------------------------------------------- + def posterior_predictive(self): + """ + Stores the prior- and posterior predictive samples, i.e. model + evaluations using the samples, into hdf5 files. - InputNames = [PCEModel.input_obj.Marginals[i].Name for i in range(len(PCEModel.input_obj.Marginals))] - if Sigma2Prior is not None: - for name in self.Discrepancy.Name: - InputNames.append(name) + priorPredictive.hdf5 : Prior predictive samples. + postPredictive_wo_noise.hdf5 : Posterior predictive samples without + the additive noise. + postPredictive.hdf5 : Posterior predictive samples with the additive + noise. - return pd.DataFrame(acceptedSamples, columns=InputNames) + Returns + ------- + None. - #-------------------------------------------------------------------------------------------------------- - def PosteriorPredictive(self): + """ - PCEModel = self.PCEModel - Model = PCEModel.ModelObj + MetaModel = self.MetaModel + Model = MetaModel.ModelObj # Make a directory to save the prior/posterior predictive - OutputDir = (r'Outputs_Bayes_' + Model.Name + '_' + self.Name) - if not os.path.exists(OutputDir): os.makedirs(OutputDir) + out_dir = f'Outputs_Bayes_{Model.name}_{self.name}' + os.makedirs(out_dir, exist_ok=True) # Read observation data and perturb it if requested - if self.MeasuredData is None: - self.MeasuredData = Model.read_observation(case=self.Name) + if self.measured_data is None: + self.measured_data = Model.read_observation(case=self.name) - if not isinstance(self.MeasuredData, pd.DataFrame): - self.MeasuredData = pd.DataFrame(self.MeasuredData) + if not isinstance(self.measured_data, pd.DataFrame): + self.measured_data = pd.DataFrame(self.measured_data) # X_values - x_values = PCEModel.ExpDesign.x_values + x_values = MetaModel.ExpDesign.x_values try: - Sigma2Prior = self.Discrepancy.Sigma2Prior + sigma2_prior = self.Discrepancy.sigma2_prior except: - Sigma2Prior = None + sigma2_prior = None - Posterior_df = self.Posterior_df + # Extract posterior samples + posterior_df = self.posterior_df # Take care of the sigma2 - if Sigma2Prior is not None: + if sigma2_prior is not None: try: - sigma2s = Posterior_df[self.Discrepancy.Name].to_numpy() - Posterior_df = Posterior_df.drop(labels=self.Discrepancy.Name, axis=1) + sigma2s = posterior_df[self.Discrepancy.name].values + posterior_df = posterior_df.drop( + labels=self.Discrepancy.name, axis=1 + ) except: sigma2s = self.sigma2s # Posterior predictive if self.emulator: - if self.SamplingMethod == 'rejection': - PriorPred = self.__meanPCEPriorPred - if self.Name != 'Calib': - PosteriorPred, PosteriorPred_std = self.__meanPCEPriorPred,self._stdPCEPriorPred + if self.inference_method == 'rejection': + prior_pred = self.__mean_pce_prior_pred + if self.name.lower() != 'calib': + post_pred = self.__mean_pce_prior_pred + post_pred_std = self._std_pce_prior_pred else: - PosteriorPred, PosteriorPred_std = PCEModel.eval_metamodel(samples=Posterior_df.to_numpy(), - name=self.Name) - # TODO: Correct the predictions with Model discrepancy - if hasattr(self, 'errorModel') and self.errorModel: - PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_model_error(self.BiasInputs, - PosteriorPred) + post_pred, post_pred_std = MetaModel.eval_metamodel( + samples=posterior_df.values + ) + else: - if self.SamplingMethod == 'rejection': - PriorPred = self.__ModelPriorPred - if self.Name != 'Calib': - PosteriorPred, PosteriorPred_std = self.__meanPCEPriorPred,self._stdPCEPriorPred + if self.inference_method == 'rejection': + prior_pred = self.__model_prior_pred + if self.name.lower() != 'calib': + post_pred = self.__mean_pce_prior_pred, + post_pred_std = self._std_pce_prior_pred else: - PosteriorPred = self.eval_Model(Samples=Posterior_df.to_numpy(),key='PostPred') - - # TODO: Correct the predictions with Model discrepancy - if hasattr(self, 'errorModel') and self.errorModel: - PosteriorPred, PosteriorPred_std = self.errorMetaModel.eval_model_error(self.BiasInputs, - PosteriorPred) + post_pred = self._eval_model( + samples=posterior_df.values, key='PostPred' + ) + # Correct the predictions with Model discrepancy + if hasattr(self, 'error_model') and self.error_model: + y_hat, y_std = self.error_MetaModel.eval_model_error( + self.bias_inputs, post_pred + ) + post_pred, post_pred_std = y_hat, y_std # Add discrepancy from likelihood Sample to the current posterior runs - TotalSigma2 = self.Discrepancy.TotalSigma2 - PosteriorPred_withnoise = copy.deepcopy(PosteriorPred) - for varIdx, var in enumerate(Model.Output.Names): - for i in range(len(PosteriorPred[var])): - pred = PosteriorPred[var][i] + total_sigma2 = self.Discrepancy.total_sigma2 + post_pred_withnoise = copy.deepcopy(post_pred) + for varIdx, var in enumerate(Model.Output.names): + for i in range(len(post_pred[var])): + pred = post_pred[var][i] # Known sigma2s - totalSigma2 = TotalSigma2[var][~np.isnan(TotalSigma2[var])][:len(pred)] - cov = np.diag(totalSigma2) + clean_sigma2 = total_sigma2[var][~np.isnan(total_sigma2[var])] + tot_sigma2 = clean_sigma2[:len(pred)] + cov = np.diag(tot_sigma2) # Check the type error term - if Sigma2Prior is not None: + if sigma2_prior is not None: # Inferred sigma2s - if hasattr(self, 'BiasInputs') and not hasattr(self, 'errorModel'): - # TODO: Infer a Bias model usig Gaussian Process Regression - EDY = self.PCEModel.ExpDesign.Y[var] - # BiasInputs = np.hstack((self.BiasInputs[var],EDY.T)) - BiasInputs = np.hstack((self.BiasInputs[var],pred.reshape(-1,1))) - sigma2 = sigma2s[i,:3] if varIdx==0 else sigma2s[i,3:] - cov = self.RBF_kernel(BiasInputs, sigma2) - # cov = self.RBF_kernel(self.BiasInputs[var], sigma2s[i]) + if hasattr(self, 'bias_inputs') and \ + not hasattr(self, 'error_model'): + # TODO: Infer a Bias model usig GPR + bias_inputs = np.hstack(( + self.bias_inputs[var], pred.reshape(-1, 1))) + params = sigma2s[i, varIdx*3:(varIdx+1)*3] + cov = self.RBF_kernel(bias_inputs, params) else: # Infer equal sigma2s try: sigma2 = sigma2s[i, varIdx] - except: + except TypeError: sigma2 = 0.0 # Convert biasSigma2s to a covMatrix cov += sigma2 * np.eye(len(pred)) - # cov = np.diag(sigma2 * totalSigma2) if self.emulator: - stdPCE = PCEModel.RMSE[var] if PCEModel.RMSE is not None else PosteriorPred_std[var][i] + if MetaModel.rmse is not None: + stdPCE = MetaModel.rmse[var] + else: + stdPCE = post_pred_std[var][i] # Expected value of variance (Assump: i.i.d stds) cov += np.diag(stdPCE**2) - # Sample a multi-variate normal distribution with mean of prediction and variance of cov - PosteriorPred_withnoise[var][i] = np.random.multivariate_normal(pred, cov, 1) + # Sample a multivariate normal distribution with mean of + # prediction and variance of cov + post_pred_withnoise[var][i] = np.random.multivariate_normal( + pred, cov, 1 + ) # ----- Prior Predictive ----- - if self.SamplingMethod == 'rejection': + if self.inference_method.lower() == 'rejection': # Create hdf5 metadata - hdf5file = OutputDir+'/priorPredictive.hdf5' + hdf5file = f'{out_dir}/priorPredictive.hdf5' hdf5_exist = os.path.exists(hdf5file) - if hdf5_exist: os.remove(hdf5file) + if hdf5_exist: + os.remove(hdf5file) file = h5py.File(hdf5file, 'a') # Store x_values if type(x_values) is dict: grp_x_values = file.create_group("x_values/") - for varIdx, var in enumerate(Model.Output.Names): + for varIdx, var in enumerate(Model.Output.names): grp_x_values.create_dataset(var, data=x_values[var]) else: file.create_dataset("x_values", data=x_values) # Store posterior predictive grpY = file.create_group("EDY/") - for varIdx, var in enumerate(Model.Output.Names): - grpY.create_dataset(var, data=PriorPred[var]) + for varIdx, var in enumerate(Model.Output.names): + grpY.create_dataset(var, data=prior_pred[var]) - # ----- Posterior Predictive ----- + # ----- Posterior Predictive only model evaluations ----- # Create hdf5 metadata - hdf5file = OutputDir+'/postPredictive_wo_noise.hdf5' + hdf5file = out_dir+'/postPredictive_wo_noise.hdf5' hdf5_exist = os.path.exists(hdf5file) - if hdf5_exist: os.remove(hdf5file) + if hdf5_exist: + os.remove(hdf5file) file = h5py.File(hdf5file, 'a') # Store x_values if type(x_values) is dict: grp_x_values = file.create_group("x_values/") - for varIdx, var in enumerate(Model.Output.Names): + for varIdx, var in enumerate(Model.Output.names): grp_x_values.create_dataset(var, data=x_values[var]) else: file.create_dataset("x_values", data=x_values) # Store posterior predictive grpY = file.create_group("EDY/") - for varIdx, var in enumerate(Model.Output.Names): - grpY.create_dataset(var, data=PosteriorPred[var]) + for varIdx, var in enumerate(Model.Output.names): + grpY.create_dataset(var, data=post_pred[var]) - # ----- Posterior Predictive ----- + # ----- Posterior Predictive with noise ----- # Create hdf5 metadata - hdf5file = OutputDir+'/postPredictive.hdf5' + hdf5file = out_dir+'/postPredictive.hdf5' hdf5_exist = os.path.exists(hdf5file) - if hdf5_exist: os.remove(hdf5file) + if hdf5_exist: + os.remove(hdf5file) file = h5py.File(hdf5file, 'a') # Store x_values if type(x_values) is dict: grp_x_values = file.create_group("x_values/") - for varIdx, var in enumerate(Model.Output.Names): + for varIdx, var in enumerate(Model.Output.names): grp_x_values.create_dataset(var, data=x_values[var]) else: file.create_dataset("x_values", data=x_values) # Store posterior predictive grpY = file.create_group("EDY/") - for varIdx, var in enumerate(Model.Output.Names): - grpY.create_dataset(var, data=PosteriorPred_withnoise[var]) + for varIdx, var in enumerate(Model.Output.names): + grpY.create_dataset(var, data=post_pred_withnoise[var]) return - #-------------------------------------------------------------------------------------------------------- - def create_Inference(self): - - # Set some variables - PCEModel = self.PCEModel - Model = PCEModel.ModelObj - NofPa = PCEModel.NofPa - OutputNames = Model.Output.Names - par_names = PCEModel.ExpDesign.par_names - - # If the prior is set by the user, take it. - if self.Samples is None: - self.Samples = self.get_Sample() - else: - try: - SamplesAllParameters = self.Samples.to_numpy() - except: - SamplesAllParameters = self.Samples - - try: - # Take care of an additional Sigma2s - self.Samples = SamplesAllParameters[:,:NofPa] - except: - self.Samples = SamplesAllParameters - - self.NrofSamples = self.Samples.shape[0] - - # ---------- Preparation of observation data ---------- - # Read observation data and perturb it if requested - if self.MeasuredData is None: - self.MeasuredData = Model.read_observation(case=self.Name) - - if not isinstance(self.MeasuredData, pd.DataFrame): - self.MeasuredData = pd.DataFrame(self.MeasuredData) - ObservationData = self.MeasuredData[OutputNames].to_numpy() - - nMeasurement, nOutputs = ObservationData.shape - self.ntotMeasurement = ObservationData[~np.isnan(ObservationData)].shape[0] - BootstrapItrNr = self.BootstrapItrNr if not self.BayesLOOCV else self.ntotMeasurement - perturbedData = np.zeros((BootstrapItrNr,self.ntotMeasurement )) - perturbedData[0] = ObservationData.T[~np.isnan(ObservationData.T)] - - if len(self.MeasurementError) == 0: - if isinstance(self.Discrepancy, dict): - Disc = self.Discrepancy['known'] - else: - Disc = self.Discrepancy - if isinstance(Disc, dict): - self.MeasurementError = {k:np.sqrt(Disc.Parameters[k]) for k in - Disc.Parameters.keys()} - else: - try: - self.MeasurementError = np.sqrt(Disc.Parameters) - except: - pass - - # ---------- Preparation of variance for covariance matrix ---------- - # TODO: Modification required. - # Independent and identically distributed - - TotalSigma2 = dict() - optSigmaFlag = isinstance(self.Discrepancy, dict) - optSigma = None - for keyIdx, key in enumerate(OutputNames): - - # Find optSigma - if optSigmaFlag and optSigma is None: - # Option A: known error with unknown bias term - optSigma = 'A' - knownDiscrepancy = self.Discrepancy['known'] - self.Discrepancy = self.Discrepancy['infer'] - sigma2 = np.array(knownDiscrepancy.Parameters[key]) - - elif optSigma == 'A' or self.Discrepancy.Parameters is not None: - # Option B: The sigma2 is known (no bias term) - if optSigma == 'A': - sigma2 = np.array(knownDiscrepancy.Parameters[key]) - else: - optSigma = 'B' - sigma2 = np.array(self.Discrepancy.Parameters[key]) - - elif not isinstance(self.Discrepancy.InputDisc, str): - # Option C: The sigma2 is unknown (bias term including error) - optSigma = 'C' - self.Discrepancy.optSigma = optSigma - sigma2 = np.zeros((nMeasurement)) - - TotalSigma2[key] = sigma2 - - self.Discrepancy.optSigma = optSigma - self.Discrepancy.TotalSigma2 = TotalSigma2 - - # If inferred sigma2s obtained from e.g. calibration are given - try: - self.sigma2s = self.Discrepancy.get_Sample(self.NrofSamples) - except: - pass - - # ---------------- Bootstrap & TOM -------------------- - if self.Bootstrap or self.BayesLOOCV: - if len(self.perturbedData) == 0: - # zero mean noise Adding some noise to the observation function - BootstrapNoise = self.BootstrapNoise - for itrIdx in range(1,BootstrapItrNr): - data = np.zeros((nMeasurement, nOutputs)) - for idx in range(len(OutputNames)): - if self.BayesLOOCV: - data[:,idx] = ObservationData[:,idx] - else: - std = np.nanstd(ObservationData[:,idx]) - if std == 0: std = 0.001 - noise = std * BootstrapNoise - data[:,idx] = np.add(ObservationData[:,idx] , np.random.normal(0, 1, ObservationData.shape[0]) * noise) - - - perturbedData[itrIdx] = data.T[~np.isnan(data.T)] - self.perturbedData = perturbedData - - # -------- Model Discrepancy ----------- - if hasattr(self, 'errorModel') and self.errorModel \ - and self.Name.lower()!='calib': - # Select posterior mean as MAP - MAP_theta = self.Samples.mean(axis=0).reshape((1,NofPa)) - # MAP_theta = stats.mode(self.Samples,axis=0)[0] - - # Evaluate the (meta-)model at the MAP - y_MAP, y_std_MAP = PCEModel.eval_metamodel(samples=MAP_theta,name=self.Name) - - # Train a GPR meta-model using MAP - self.errorMetaModel = PCEModel.create_model_error(self.BiasInputs,y_MAP, - Name=self.Name) - - # ----------------------------------------------------- - # ----- Loop over the perturbed observation data ------ - # ----------------------------------------------------- - # Initilize arrays - logLikelihoods = np.zeros((self.NrofSamples,BootstrapItrNr),dtype=np.float16) - BME_Corr = np.zeros((BootstrapItrNr)) - logBME = np.zeros((BootstrapItrNr)) - KLD = np.zeros((BootstrapItrNr)) - infEntropy = np.zeros((BootstrapItrNr)) - surrError = dict() - - # Evaluate the PCEModel - if self.emulator: - self.__meanPCEPriorPred, self._stdPCEPriorPred = PCEModel.eval_metamodel(samples=self.Samples, - name=self.Name) - # TODO: Correct the predictions with Model discrepancy - if hasattr(self, 'errorModel') and self.errorModel: - self.__meanPCEPriorPred, self._stdPCEPriorPred = self.errorMetaModel.eval_model_error(self.BiasInputs, - self.__meanPCEPriorPred) - - # Surrogate model's error using RMSE of test data - surrError = PCEModel.RMSE if hasattr(PCEModel,'RMSE') else None - else: - self.__ModelPriorPred = self.eval_Model(Samples=self.Samples,key='PriorPred') - - for itrIdx, data in tqdm(enumerate(self.perturbedData), ascii=True, desc ="Boostraping the BME calculations"): - - # ---------------- Likelihood calculation ---------------- - modelEvaluations = self.__meanPCEPriorPred if self.emulator else self.__ModelPriorPred - - # TODO: Leave one out / Justifiability analysis - if self.BayesLOOCV: - self.selectedIndices = np.nonzero(data)[0] - # self.selectedIndices = np.delete(list(range(len(self.perturbedData))),itrIdx) - - # Prepare data dataframe - nobs = list(self.MeasuredData.count().to_numpy()[1:]) - numbers = list(map(sum, zip([0] + nobs, nobs))) - indices = list(zip([0] + numbers, numbers)) - data_dict = {OutputNames[i]:data[j:k] for i,(j,k) in enumerate(indices)} - - # unknown sigma2 - if optSigma == 'C' or hasattr(self, 'sigma2s'): - logLikelihoods[:,itrIdx] = self.normpdfSigma2(modelEvaluations, data_dict, TotalSigma2,Sigma2=self.sigma2s, std=surrError) - else: - # known sigma2 - logLikelihoods[:,itrIdx] = self.normpdf(modelEvaluations, data_dict, TotalSigma2, std=surrError) - - # y,std = PCEModel.eval_model_error(self.Samples) - # logLikError = self.normpdferror(y, std) - - # ---------------- BME Calculations ---------------- - # BME (log) - logBME[itrIdx] = np.log(np.nanmean(np.exp(logLikelihoods[:,itrIdx],dtype=np.float128))) - - # Rejection Step - # Random numbers between 0 and 1 - unif = np.random.rand(1, self.NrofSamples)[0] - - # Reject the poorly performed prior - Likelihoods = np.exp(logLikelihoods[:,itrIdx],dtype=np.float64) - accepted = (Likelihoods/np.max(Likelihoods)) >= unif - X_Posterior = self.Samples[accepted] - - # Posterior-based expectation of likelihoods - postExpLikelihoods = np.mean(logLikelihoods[:,itrIdx][accepted]) - - # Posterior-based expectation of prior densities - # postExpPrior = np.mean([PCEModel.ExpDesign.JDist(sample)[0] for sample in X_Posterior]) - # postExpPrior = np.mean(np.log([PCEModel.ExpDesign.JDist.pdf(X_Posterior.T)])) - - # Calculate Kullback-Leibler Divergence - #KLD = np.mean(np.log(Likelihoods[Likelihoods!=0])- logBME) - KLD[itrIdx] = postExpLikelihoods - logBME[itrIdx] - - # Information Entropy based on Entropy paper Eq. 38 - # infEntropy[itrIdx] = logBME[itrIdx] - postExpPrior - postExpLikelihoods - - # TODO: BME correction when using Emulator - # if self.emulator: - # BME_Corr[itrIdx] = self.BME_Corr_Weight(data,TotalSigma2, X_Posterior) - - # Clear memory - gc.collect(generation=2) - - # ---------------- Store BME, Likelihoods for all ---------------- - # Likelihoods (Size: NrofSamples,BootstrapItrNr) - self.logLikelihoods = logLikelihoods - - # BME (log), KLD, infEntropy (Size: 1,BootstrapItrNr) - self.logBME = logBME - self.KLD = KLD - self.infEntropy = infEntropy - # TODO: BMECorrFactor (log) (Size: 1,BootstrapItrNr) - # if self.emulator: self.BMECorrFactor = BME_Corr - - # BME = BME + BMECorrFactor - if self.emulator: self.logBME = self.logBME #+ self.BMECorrFactor - - # ---------------- Parameter Bayesian inference ---------------- - if self.SamplingMethod.lower() == 'mcmc': - - # Convert the pandas data frame to numpy, if it's applicable. - if self.MCMCinitSamples is not None and isinstance(self.MCMCinitSamples, pd.DataFrame): - self.MCMCinitSamples = self.MCMCinitSamples.to_numpy() - - # MCMC - initsamples = None if self.MCMCinitSamples is None else self.MCMCinitSamples - nsteps = 100000 if self.MCMCnSteps is None else self.MCMCnSteps - nwalkers = 50*self.PCEModel.NofPa if self.MCMCnwalkers is None else self.MCMCnwalkers - multiprocessing = False if self.MultiProcessMCMC is None else self.MultiProcessMCMC - MCMC_ = MCMC(self, initsamples=initsamples, nwalkers=nwalkers, verbose=self.MCMCverbose, - nsteps = nsteps, moves=self.MCMCmoves, multiprocessing=multiprocessing) - self.Posterior_df = MCMC_.run_sampler(self.MeasuredData, TotalSigma2) - - elif self.Name.lower() == 'valid': - self.Posterior_df = pd.DataFrame(self.Samples, columns=par_names) + # ------------------------------------------------------------------------- + def _plot_max_a_posteriori(self): + """ + Plots the response of the model output against that of the metamodel at + the maximum a posteriori sample (mean or mode of posterior.) - else: # Rejection sampling - self.Posterior_df = self.Rejection_Sampling() + Returns + ------- + None. + """ - # Provide posterior's summary - print('\n') - print('-'*15 + 'Posterior summary' + '-'*15) - pd.options.display.max_columns = None - pd.options.display.max_rows = None - print(self.Posterior_df.describe()) - print('-'*50) + MetaModel = self.MetaModel + Model = MetaModel.ModelObj + out_dir = f'Outputs_Bayes_{Model.name}_{self.name}' + opt_sigma = self.Discrepancy.opt_sigma - # -------- Model Discrepancy ----------- - if hasattr(self, 'errorModel') and self.errorModel \ - and self.Name.lower()=='calib': - if self.SamplingMethod.lower() == 'mcmc': - self.errorMetaModel = MCMC_.errorMetaModel + # -------- Find MAP and run MetaModel and origModel -------- + # Compute the MAP + if self.max_a_posteriori.lower() == 'mean': + if opt_sigma == "B": + Posterior_df = self.posterior_df.values else: - # Select posterior mean as MAP - Posterior_df = self.Posterior_df.to_numpy() if optSigma == "B" else self.Posterior_df.to_numpy()[:,:-len(Model.Output.Names)] - MAP_theta = Posterior_df.mean(axis=0).reshape((1,NofPa)) - # MAP_theta = stats.mode(Posterior_df,axis=0)[0] - - # Evaluate the (meta-)model at the MAP - y_MAP, y_std_MAP = PCEModel.eval_metamodel(samples=MAP_theta,name=self.Name) - - # Train a GPR meta-model using MAP - self.errorMetaModel = PCEModel.create_model_error(self.BiasInputs,y_MAP, - Name=self.Name) - - # -------- Posterior perdictive ----------- - self.PosteriorPredictive() - - # ----------------------------------------------------- - # ------------------ Visualization -------------------- - # ----------------------------------------------------- - OutputDir = (r'Outputs_Bayes_' + Model.Name + '_' + self.Name) - if not os.path.exists(OutputDir): os.makedirs(OutputDir) - - # -------- Posteior parameters -------- - if optSigma != "B": - par_names.extend([self.Discrepancy.InputDisc.Marginals[i].Name for i in range(len(self.Discrepancy.InputDisc.Marginals))]) - - figPosterior = corner.corner(self.Posterior_df.to_numpy(), labels=par_names, - quantiles=[0.15, 0.5, 0.85],show_titles=True, - title_fmt=self.Corner_title_fmt, - labelpad=0.2, - use_math_text=True, - title_kwargs={"fontsize": 28}, - plot_datapoints=False, - plot_density=False, - fill_contours=True, - smooth=0.5, - smooth1d=0.5) - - # Loop over axes and set x limits - if optSigma == "B": - axes = np.array(figPosterior.axes).reshape((len(par_names), len(par_names))) - for yi in range(len(par_names)): - ax = axes[yi, yi] - ax.set_xlim(PCEModel.bound_tuples[yi]) - for xi in range(yi): - ax = axes[yi, xi] - ax.set_xlim(PCEModel.bound_tuples[xi]) - - # Extract the axes - # axes = np.array(figPosterior.axes).reshape((NofPa, NofPa)) - - # # Loop over the diagonal - # for i in range(NofPa): - # ax = axes[i, i] - # ax.axvline(MAP_theta[0,i], ls='--', color="r") - - # # Rotate the axis label - # for i in range(NofPa): - # #ax_x = axes[-1, i] - # ax_y = axes[i, 0] - # if i != 0: - # ax_y.set_ylabel(parNames[i],rotation=45) - - # Loop over the histograms - # for yi in range(NofPa): - # for xi in range(yi): - # ax = axes[yi, xi] - # ax.set_xlabel(parNames[xi],rotation=45) - # ax.set_ylabel(parNames[yi],rotation=45) - # ax.axvline(MAP_theta[0,xi], ls='--', color="r") - # ax.axhline(MAP_theta[0,yi], ls='--', color="r") - # ax.plot(MAP_theta[0,xi], MAP_theta[0,yi], "sr") - - # plt.yticks(rotation=45, horizontalalignment='right') - figPosterior.set_size_inches((24,16)) - - # Turn off gridlines - for ax in figPosterior.axes: - ax.grid(False) - - if self.emulator: - plotname = '/Posterior_Dist_'+Model.Name+'_emulator' + Posterior_df = self.posterior_df.values[:, :-Model.n_outputs] + map_theta = Posterior_df.mean(axis=0).reshape( + (1, MetaModel.n_params)) else: - plotname = '/Posterior_Dist_'+Model.Name - - figPosterior.savefig('./'+OutputDir+ plotname + '.pdf', - bbox_inches='tight') - - - # -------- Plot MAP -------- - if self.PlotMAPPred: - - # -------- Find MAP and run PCEModel and origModel -------- - # Compute the MAP - if self.MAP.lower() =='mean': - Posterior_df = self.Posterior_df.to_numpy() if optSigma == "B" else self.Posterior_df.to_numpy()[:,:-len(Model.Output.Names)] - MAP_theta = Posterior_df.mean(axis=0).reshape((1,NofPa)) - else: - MAP_theta = stats.mode(Posterior_df.to_numpy(),axis=0)[0] - - print("\nPoint estimator:\n", MAP_theta[0]) - - # Run the models for MAP - # PCEModel - MAP_PCEModel, MAP_PCEModelstd = PCEModel.eval_metamodel(samples=MAP_theta,name=self.Name) - self.MAPpceModelMean = MAP_PCEModel - self.MAPpceModelStd = MAP_PCEModelstd - - # origModel - MAP_origModel = self.eval_Model(Samples=MAP_theta) - self.MAPorigModel = MAP_origModel - - - # Extract slicing index - index = PCEModel.index - x_values = MAP_origModel['x_values'] + map_theta = stats.mode(Posterior_df.values, axis=0)[0] + # Prin report + print("\nPoint estimator:\n", map_theta[0]) + + # Run the models for MAP + # MetaModel + map_metamodel_mean, map_metamodel_std = MetaModel.eval_metamodel( + samples=map_theta) + self.map_metamodel_mean = map_metamodel_mean + self.map_metamodel_std = map_metamodel_std + + # origModel + map_orig_model = self._eval_model(samples=map_theta) + self.map_orig_model = map_orig_model + + # Extract slicing index + x_values = map_orig_model['x_values'] + + # List of markers and colors + Color = ['k', 'b', 'g', 'r'] + Marker = 'x' + + # Create a PdfPages object + pdf = PdfPages(f'./{out_dir}MAP_PCE_vs_Model_{self.name}.pdf') + fig = plt.figure() + for i, key in enumerate(Model.Output.names): + + y_val = map_orig_model[key] + y_pce_val = map_metamodel_mean[key] + y_pce_val_std = map_metamodel_std[key] + + plt.plot(x_values, y_val, color=Color[i], marker=Marker, + lw=2.0, label='$Y_{MAP}^{M}$') + + plt.plot( + x_values, y_pce_val[i], color=Color[i], lw=2.0, + marker=Marker, linestyle='--', label='$Y_{MAP}^{PCE}$' + ) + # plot the confidence interval + plt.fill_between( + x_values, y_pce_val[i] - 1.96*y_pce_val_std[i], + y_pce_val[i] + 1.96*y_pce_val_std[i], + color=Color[i], alpha=0.15 + ) + + # Calculate the adjusted R_squared and RMSE + R2 = r2_score(y_pce_val.reshape(-1, 1), y_val.reshape(-1, 1)) + rmse = np.sqrt(mean_squared_error(y_pce_val, y_val)) + + plt.ylabel(key) + plt.xlabel("Time [s]") + plt.title(f'Model vs MetaModel {key}') + + ax = fig.axes[0] + leg = ax.legend(loc='best', frameon=True) + fig.canvas.draw() + p = leg.get_window_extent().inverse_transformed(ax.transAxes) + ax.text( + p.p0[1]-0.05, p.p1[1]-0.25, + f'RMSE = {rmse:.3f}\n$R^2$ = {R2:.3f}', + transform=ax.transAxes, color='black', + bbox=dict(facecolor='none', edgecolor='black', + boxstyle='round,pad=1')) - # List of markers and colors - Color = ['k','b', 'g', 'r'] - Marker = 'x' - - - # create a PdfPages object - pdf = PdfPages('./'+OutputDir+ 'MAP_PCE_vs_Model_'+ self.Name + '.pdf') - fig = plt.figure() - for i, key in enumerate(Model.Output.Names): - - Y_Val_ = MAP_origModel[key] - Y_PC_Val_ = MAP_PCEModel[key] - Y_PC_Val_std_ = MAP_PCEModelstd[key] - - plt.plot(x_values, Y_Val_, color=Color[i], marker=Marker, lw=2.0, label='$Y_{MAP}^{M}$') - - plt.plot(x_values, Y_PC_Val_[idx,:], color=Color[i], marker=Marker, lw=2.0, linestyle='--', label='$Y_{MAP}^{PCE}$') - plt.fill_between(x_values, Y_PC_Val_[idx,:]-1.96*Y_PC_Val_std_[idx,:], - Y_PC_Val_[idx,:]+1.96*Y_PC_Val_std_[idx,:], color=Color[i],alpha=0.15) - - - # Calculate the adjusted R_squared and RMSE - R2 = r2_score(Y_PC_Val_.reshape(-1,1), Y_Val_.reshape(-1,1)) - RMSE = np.sqrt(mean_squared_error(Y_PC_Val_, Y_Val_)) - - plt.ylabel(key) - plt.xlabel("Time [s]") - plt.title('Model vs PCEModel '+ key) - - ax = fig.axes[0] - leg = ax.legend(loc='best', frameon=True) - fig.canvas.draw() - p = leg.get_window_extent().inverse_transformed(ax.transAxes) - ax.text(p.p0[1]-0.05, p.p1[1]-0.25, 'RMSE = '+ str(round(RMSE, 3)) + '\n' + r'$R^2$ = '+ str(round(R2, 3)), - transform=ax.transAxes, color='black', bbox=dict(facecolor='none', edgecolor='black', - boxstyle='round,pad=1')) - - plt.show() - - # save the current figure - pdf.savefig(fig, bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - pdf.close() - - - # -------- Plot logBME dist -------- - if self.Bootstrap: - # # Computing the TOM performance - # x_values = np.linspace(np.min(self.logBME), np.max(self.logBME), 1000) - dof = ObservationData.shape[0] - # self.logTOMBME = stats.chi2(dof).logpdf(-1*x_values)#pdf(x_values) - self.logTOMBME = stats.chi2.rvs(dof, size=self.logBME.shape[0]) - - fig, ax = plt.subplots() - sns.kdeplot(self.logTOMBME, ax=ax, color="green", shade=True) - sns.kdeplot(self.logBME, ax=ax, color="blue", shade=True,label='Model BME') - - ax.set_xlabel('log$_{10}$(BME)') - ax.set_ylabel('Probability density') - - from matplotlib.patches import Patch - legend_elements = [Patch(facecolor='green', edgecolor='green',label='TOM BME'), - Patch(facecolor='blue', edgecolor='blue',label='Model BME')] - ax.legend(handles=legend_elements) - - if self.emulator: - plotname = '/BME_hist_'+Model.Name+'_emulator' - else: - plotname = '/BME_hist_'+Model.Name - - plt.savefig('./'+OutputDir+plotname+'.pdf', bbox_inches='tight') plt.show() - plt.close() - - # -------- Posteior perdictives -------- - if self.PlotPostPred: - # sns.set_context("paper") - # Plot the posterior predictive - for Out_idx, OutputName in enumerate(Model.Output.Names): - fig, ax = plt.subplots() - with sns.axes_style("ticks"): #whitegrid - x_key = list(self.MeasuredData)[0] - - # --- Read prior and posterior predictive --- - if self.SamplingMethod == 'rejection': - # --- Prior --- - # Load posterior predictive - f = h5py.File(OutputDir+'/'+"priorPredictive.hdf5", 'r+') - - try: - x_coords = np.array(f["x_values"]) - except: - x_coords = np.array(f["x_values/"+OutputName]) - - X_values = np.repeat(x_coords , np.array(f["EDY/"+OutputName]).shape[0]) - - priorPred_df = {} - priorPred_df[x_key] = X_values - priorPred_df[OutputName] = np.array(f["EDY/"+OutputName]).flatten('F') - priorPred_df = pd.DataFrame(priorPred_df) - - tags_post = ['prior'] * len(priorPred_df) - priorPred_df.insert(len(priorPred_df.columns), "Tags", tags_post, True) - - - # --- Posterior --- - f = h5py.File(OutputDir+'/'+"postPredictive.hdf5", 'r+') - try: - x_coords = np.array(f["x_values"]) - except: - x_coords = np.array(f["x_values/"+OutputName]) - - X_values = np.repeat(x_coords , np.array(f["EDY/"+OutputName]).shape[0]) - - postPred_df = {} - postPred_df[x_key] = X_values - postPred_df[OutputName] = np.array(f["EDY/"+OutputName]).flatten('F') - postPred_df = pd.DataFrame(postPred_df) - - tags_post = ['posterior'] * len(postPred_df) - postPred_df.insert(len(postPred_df.columns), "Tags", tags_post, True) - - # Concatenate two dataframes based on x_values - frames = [priorPred_df,postPred_df] - AllPred_df = pd.concat(frames) - - # --- Plot posterior predictive --- - sns.violinplot(x_key, y=OutputName, hue="Tags", legend=False, - ax=ax, split=True, inner=None, data=AllPred_df, color=".8") - - - # # --- Plot MAP (PCEModel) --- - # Find the x,y coordinates for each point - x_coords = np.arange(x_coords.shape[0]) - - # sns.pointplot(x=x_coords, y=MAP_PCEModel[OutputName][0], color='lime',markers='+', - # linestyles='', capsize=16, ax=ax) - - # ax.errorbar(x_coords, MAP_PCEModel[OutputName][0], - # yerr=1.96*MAP_PCEModelstd[OutputName][0], - # ecolor='lime', fmt=' ', zorder=-1) - - # # --- Plot MAP (origModel) --- - # sns.pointplot(x=x_coords, y=MAP_origModel[OutputName][0], color='r', markers='+', - # linestyles='' , capsize=14, ax=ax) - - # --- Plot Data --- - first_header = list(self.MeasuredData)[0] - ObservationData = self.MeasuredData.round({first_header: 6}) - sns.pointplot(x=first_header, y=OutputName, color='g', markers='x', - linestyles='', capsize=16, data=ObservationData, ax=ax) - - ax.errorbar(x_coords, ObservationData[OutputName].to_numpy(), - yerr=1.96*self.MeasurementError[OutputName].to_numpy(), - ecolor='g', fmt=' ', zorder=-1) - - # Add labels to the legend - handles, labels = ax.get_legend_handles_labels() - # labels.append('point estimate (PCE Model)') - # labels.append('point estimate (Orig. Model)') - labels.append('Data') - - import matplotlib.lines as mlines - #red_patch = mpatches.Patch(color='red') - data_marker = mlines.Line2D([], [], color='lime', marker='+', linestyle='None', - markersize=10) - # handles.append(data_marker) - # data_marker = mlines.Line2D([], [], color='r', marker='+', linestyle='None', - # markersize=10) - # handles.append(data_marker) - # data_marker = mlines.Line2D([], [], color='g', marker='x', linestyle='None', - # markersize=10) - handles.append(data_marker) - - # Add legend - ax.legend(handles=handles, labels=labels, loc='best', - fontsize='large', frameon=True) - - else: - # Load posterior predictive - f = h5py.File(OutputDir+'/'+"postPredictive.hdf5", 'r+') - - try: - x_coords = np.array(f["x_values"]) - except: - x_coords = np.array(f["x_values/"+OutputName]) - - mu = np.mean(np.array(f["EDY/"+OutputName]), axis=0) - std = np.std(np.array(f["EDY/"+OutputName]), axis=0) - - # --- Plot posterior predictive --- - plt.plot(x_coords, mu, marker='o', color='b', label='Mean Post. Predictive') - plt.fill_between(x_coords, mu-1.96*std, mu+1.96*std, color='b', alpha=0.15) - - # --- Plot Data --- - ax.plot(x_coords, self.MeasuredData[OutputName].to_numpy(),'ko',label ='data',markeredgecolor='w') - - # --- Plot ExpDesign --- - index = self.PCEModel.index - if index is not None: - if self.Name == 'Valid': - origOutExpDesign = self.PCEModel.ExpDesign.Y[key][:,index:] - else: - origOutExpDesign = self.PCEModel.ExpDesign.Y[key][:,:index] - else: - origOutExpDesign = self.PCEModel.ExpDesign.Y[key] - - for output in origOutExpDesign: - plt.plot(x_coords, output, color='grey', alpha=0.15) - # # --- Plot MAP (PCEModel) --- - # ax.plot(x_coords, MAP_PCEModel[OutputName][0],ls='--', marker='o',c = 'lime' ,label ='point estimate (PCE Model)',markeredgecolor='w') + # save the current figure + pdf.savefig(fig, bbox_inches='tight') - # # --- Plot MAP (origModel) --- - # ax.plot(x_coords, MAP_origModel[OutputName][0],ls='-', marker='o',c = 'r',label ='point estimate (Orig. Model)',markeredgecolor='w') + # Destroy the current plot + plt.clf() + pdf.close() - # Add labels for axes - plt.xlabel('Time [s]') - plt.ylabel(key) - - # Add labels to the legend - handles, labels = ax.get_legend_handles_labels() + # ------------------------------------------------------------------------- + def _plot_post_predictive(self): + """ + Plots the posterior predictives against the observation data. - import matplotlib.lines as mlines - import matplotlib.patches as mpatches - patch = mpatches.Patch(color='b',alpha=0.15) - handles.insert(1,patch) - labels.insert(1,'95 $\%$ CI') + Returns + ------- + None. - # data_marker = mlines.Line2D([], [], color='grey',alpha=0.15) - # handles.append(data_marker) - # labels.append('Orig. Model Responses') + """ - # Add legend - ax.legend(handles=handles, labels=labels, loc='best', - frameon=True) + Model = self.MetaModel.ModelObj + out_dir = f'Outputs_Bayes_{Model.name}_{self.name}' + # Plot the posterior predictive + for out_idx, out_name in enumerate(Model.Output.names): + fig, ax = plt.subplots() + with sns.axes_style("ticks"): + x_key = list(self.measured_data)[0] + # --- Read prior and posterior predictive --- + if self.inference_method == 'rejection': + # --- Prior --- + # Load posterior predictive + f = h5py.File( + f'{out_dir}/priorPredictive.hdf5', 'r+') + try: + x_coords = np.array(f["x_values"]) + except: + x_coords = np.array(f[f"x_values/{out_name}"]) + + X_values = np.repeat( + x_coords, np.array(f[f"EDY/{out_name}"]).shape[0]) + + prior_pred_df = {} + prior_pred_df[x_key] = X_values + prior_pred_df[out_name] = np.array( + f[f"EDY/{out_name}"]).flatten('F') + prior_pred_df = pd.DataFrame(prior_pred_df) + + tags_post = ['prior'] * len(prior_pred_df) + prior_pred_df.insert( + len(prior_pred_df.columns), "Tags", tags_post, + True) + f.close() + + # --- Posterior --- + f = h5py.File(f"{out_dir}/postPredictive.hdf5", 'r+') + + post_pred_df = {} + post_pred_df[x_key] = X_values + post_pred_df[out_name] = np.array( + f[f"EDY/{out_name}"]).flatten('F') + post_pred_df = pd.DataFrame(post_pred_df) + + tags_post = ['posterior'] * len(post_pred_df) + post_pred_df.insert( + len(post_pred_df.columns), "Tags", tags_post, True) + f.close() + # Concatenate two dataframes based on x_values + frames = [prior_pred_df, post_pred_df] + all_pred_df = pd.concat(frames) + + # --- Plot posterior predictive --- + sns.violinplot( + x_key, y=out_name, data=all_pred_df, hue="Tags", + legend=False, ax=ax, split=True, inner=None, + color=".8") + + # --- Plot Data --- + # Find the x,y coordinates for each point + x_coords = np.arange(x_coords.shape[0]) + first_header = list(self.measured_data)[0] + obs_data = self.measured_data.round({first_header: 6}) + sns.pointplot( + x=first_header, y=out_name, color='g', markers='x', + linestyles='', capsize=16, data=obs_data, ax=ax) + + ax.errorbar( + x_coords, obs_data[out_name].values, + yerr=1.96*self.measurement_error[out_name].values, + ecolor='g', fmt=' ', zorder=-1) + + # Add labels to the legend + handles, labels = ax.get_legend_handles_labels() + labels.append('Data') + + data_marker = mlines.Line2D( + [], [], color='lime', marker='+', linestyle='None', + markersize=10) + handles.append(data_marker) + + # Add legend + ax.legend(handles=handles, labels=labels, loc='best', + fontsize='large', frameon=True) - # Save figure in pdf format - if self.emulator: - plotname = '/Post_Prior_Perd_'+Model.Name+'_emulator' - else: - plotname = '/Post_Prior_Perd_'+Model.Name + else: + # Load posterior predictive + f = h5py.File(f"{out_dir}/postPredictive.hdf5", 'r+') - fig.savefig('./'+OutputDir+plotname+ '_' +OutputName +'.pdf', bbox_inches='tight') + try: + x_coords = np.array(f["x_values"]) + except: + x_coords = np.array(f[f"x_values/{out_name}"]) + + mu = np.mean(np.array(f[f"EDY/{out_name}"]), axis=0) + std = np.std(np.array(f[f"EDY/{out_name}"]), axis=0) + + # --- Plot posterior predictive --- + plt.plot( + x_coords, mu, marker='o', color='b', + label='Mean Post. Predictive') + plt.fill_between( + x_coords, mu-1.96*std, mu+1.96*std, color='b', + alpha=0.15) + + # --- Plot Data --- + ax.plot( + x_coords, self.measured_data[out_name].values, + 'ko', label='data', markeredgecolor='w') + + # --- Plot ExpDesign --- + orig_ED_Y = self.MetaModel.ExpDesign.Y[out_name] + for output in orig_ED_Y: + plt.plot( + x_coords, output, color='grey', alpha=0.15 + ) + + # Add labels for axes + plt.xlabel('Time [s]') + plt.ylabel(out_name) + + # Add labels to the legend + handles, labels = ax.get_legend_handles_labels() + + patch = Patch(color='b', alpha=0.15) + handles.insert(1, patch) + labels.insert(1, '95 $\\%$ CI') + + # Add legend + ax.legend(handles=handles, labels=labels, loc='best', + frameon=True) + + # Save figure in pdf format + if self.emulator: + plotname = f'/Post_Prior_Perd_{Model.Name}_emulator' + else: + plotname = f'/Post_Prior_Perd_{Model.Name}' - return self + fig.savefig(f'./{out_dir}{plotname}_{out_name}.pdf', + bbox_inches='tight') diff --git a/src/bayesvalidrox/bayes_inference/discrepancy.py b/src/bayesvalidrox/bayes_inference/discrepancy.py index a2b480d15..a812f2995 100644 --- a/src/bayesvalidrox/bayes_inference/discrepancy.py +++ b/src/bayesvalidrox/bayes_inference/discrepancy.py @@ -24,20 +24,20 @@ Pfaffenwaldring 61 Created on Mon Sep 2 10:48:35 2019 """ import scipy.stats as stats -from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns -from bayesvalidrox.surrogate_models.inputs import Input +from surrogate_models.exp_designs import ExpDesigns +from surrogate_models.inputs import Input class Discrepancy: def __init__(self, InputDisc): - self.Type = 'Gaussian' - self.Parameters = None - self.Name = 'Sigma2' - self.Prior = None + self.type = 'Gaussian' + self.parameters = None + self.name = 'Sigma2' + self.prior = None self.Marginals = [] self.InputDisc = InputDisc - self.NrofSamples = 10000 - self.Sigma2Prior = None + self.n_samples = 10000 + self.sigma2_prior = None def create_inputDisc(self): InputClass = Input() @@ -46,12 +46,27 @@ class Discrepancy: return InputClass # ------------------------------------------------------------------------- - def get_Sample(self, nSamples): - self.NrofSamples = nSamples + def get_sample(self, n_samples): + """ + Generate samples for the sigma2s, i.e. the diagonal entries of the + variance-covariance matrix in the multivariate normal distribution. + + Parameters + ---------- + n_samples : int + Number of samples (parameter sets). + + Returns + ------- + sigma2_prior: array of shape (n_samples, n_params) + Sigma2 samples. + + """ + self.n_samples = n_samples ExpDesign = ExpDesigns(self.InputDisc) - self.Sigma2Prior = ExpDesign.GetSample(nSamples, - SamplingMethod='random', - MaxPceDegree=1) + self.sigma2_prior = ExpDesign.generate_ED( + n_samples, sampling_method='random', max_pce_deg=1 + ) # Store BoundTuples self.ExpDesign = ExpDesign @@ -60,14 +75,8 @@ class Discrepancy: # Save the names of sigmas if len(self.InputDisc.Marginals) != 0: - self.Name = [] + self.name = [] for Marginalidx in range(len(self.InputDisc.Marginals)): - self.Name.append(self.InputDisc.Marginals[Marginalidx].Name) - - return self.Sigma2Prior - - # ------------------------------------------------------------------------- - def create_Discrepancy(self): + self.name.append(self.InputDisc.Marginals[Marginalidx].name) - self.get_Sample() - return self + return self.sigma2_prior diff --git a/src/bayesvalidrox/bayes_inference/mcmc.py b/src/bayesvalidrox/bayes_inference/mcmc.py index 4edf8be72..585303ba7 100755 --- a/src/bayesvalidrox/bayes_inference/mcmc.py +++ b/src/bayesvalidrox/bayes_inference/mcmc.py @@ -33,43 +33,62 @@ import shutil os.environ["OMP_NUM_THREADS"] = "1" -class MCMC(): +class MCMC: """ - nwalkers = 20 # number of MCMC walkers - nburn = 100 # "burn-in" period to let chains stabilize - nsteps = 1000 # number of MCMC steps to take - ntemps = 20 + A class for bayesian inference using a Markov-Chain Monte-Carlo Sampler. """ - def __init__(self, BayesOpts, initsamples=None, nwalkers=100, nburn=200, - nsteps=100000, moves=None, multiprocessing=False, - verbose=False): + def __init__(self, BayesOpts): - self.BayesOpts = BayesOpts - self.initsamples = initsamples - self.nwalkers = nwalkers - self.nburn = nburn - self.nsteps = nsteps - self.moves = moves - self.mp = multiprocessing - self.verbose = verbose + self.BayesOpts = BayesOpts - def run_sampler(self, Observation, TotalSigma2): + def run_sampler(self, observation, total_sigma2): BayesObj = self.BayesOpts - PCEModel = BayesObj.PCEModel - Model = PCEModel.ModelObj + MetaModel = BayesObj.MetaModel + Model = MetaModel.ModelObj Discrepancy = self.BayesOpts.Discrepancy - nrCPUs = Model.nrCPUs - priorDist = PCEModel.ExpDesign.JDist - ndim = PCEModel.NofPa + n_cpus = Model.n_cpus + priorDist = MetaModel.ExpDesign.JDist + ndim = MetaModel.n_params self.counter = 0 - OutputDir = (r'Outputs_Bayes_' + Model.Name + '_' + self.BayesOpts.Name) - if not os.path.exists(OutputDir): - os.makedirs(OutputDir) - - self.Observation = Observation - self.TotalSigma2 = TotalSigma2 + output_dir = f'Outputs_Bayes_{Model.name}_{self.BayesOpts.name}' + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + self.observation = observation + self.total_sigma2 = total_sigma2 + + # Unpack mcmc parameters given to BayesObj.mcmc_params + self.initsamples = None + self.nwalkers = 100 + self.nburn = 200 + self.nsteps = 100000 + self.moves = None + self.mp = False + self.verbose = False + + # Extract initial samples + if 'init_samples' in BayesObj.mcmc_params: + self.initsamples = BayesObj.mcmc_params['init_samples'] + if isinstance(self.initsamples, pd.DataFrame): + self.initsamples = self.initsamples.values + + # Extract number of steps per walker + if 'n_steps' in BayesObj.mcmc_params: + self.nsteps = int(BayesObj.mcmc_params['n_steps']) + # Extract number of walkers (chains) + if 'n_walkers' in BayesObj.mcmc_params: + self.nwalkers = int(BayesObj.mcmc_params['n_walkers']) + # Extract moves + if 'moves' in BayesObj.mcmc_params: + self.moves = BayesObj.mcmc_params['moves'] + # Extract multiprocessing + if 'multiprocessing' in BayesObj.mcmc_params: + self.mp = BayesObj.mcmc_params['multiprocessing'] + # Extract verbose + if 'verbose' in BayesObj.mcmc_params: + self.verbose = BayesObj.mcmc_params['verbose'] # Set initial samples np.random.seed(0) @@ -78,52 +97,55 @@ class MCMC(): initsamples = priorDist.sample(self.nwalkers).T except: # when aPCE selected - gaussian kernel distribution - inputSamples = PCEModel.ExpDesign.raw_data.T - random_indices = np.random.choice(len(inputSamples), - size=self.nwalkers, - replace=False) + inputSamples = MetaModel.ExpDesign.raw_data.T + random_indices = np.random.choice( + len(inputSamples), size=self.nwalkers, replace=False + ) initsamples = inputSamples[random_indices] else: if self.initsamples.ndim == 1: # When MAL is given. theta = self.initsamples - initsamples = [theta + 1e-1*np.multiply(np.random.randn(ndim), - theta) for i in + initsamples = [theta + 1e-1*np.multiply( + np.random.randn(ndim), theta) for i in range(self.nwalkers)] else: # Pick samples based on a uniform dist between min and max of # each dim initsamples = np.zeros((self.nwalkers, ndim)) bound_tuples = [] - for idxDim in range(ndim): - lower = np.min(self.initsamples[:, idxDim]) - upper = np.max(self.initsamples[:, idxDim]) + for idx_dim in range(ndim): + lower = np.min(self.initsamples[:, idx_dim]) + upper = np.max(self.initsamples[:, idx_dim]) bound_tuples.append((lower, upper)) dist = st.uniform(loc=lower, scale=upper-lower) - initsamples[:, idxDim] = dist.rvs(size=self.nwalkers) + initsamples[:, idx_dim] = dist.rvs(size=self.nwalkers) # Update lower and upper - PCEModel.ExpDesign.bound_tuples = bound_tuples + MetaModel.ExpDesign.bound_tuples = bound_tuples # Check if sigma^2 needs to be inferred - if Discrepancy.optSigma != 'B': - sigma2Samples = Discrepancy.get_Sample(self.nwalkers) + if Discrepancy.opt_sigma != 'B': + sigma2_samples = Discrepancy.get_sample(self.nwalkers) # Update initsamples - initsamples = np.hstack((initsamples, sigma2Samples)) + initsamples = np.hstack((initsamples, sigma2_samples)) # Update ndim ndim = initsamples.shape[1] + # Discrepancy bound + disc_bound_tuple = Discrepancy.ExpDesign.bound_tuples + # Update bound_tuples - PCEModel.ExpDesign.bound_tuples += Discrepancy.ExpDesign.bound_tuples + MetaModel.ExpDesign.bound_tuples += disc_bound_tuple print("\n>>>> Bayesian inference with MCMC for " - f"{self.BayesOpts.Name} started. <<<<<<") + f"{self.BayesOpts.name} started. <<<<<<") # Set up the backend - filename = OutputDir+"/emcee_sampler.h5" + filename = f"{output_dir}/emcee_sampler.h5" backend = emcee.backends.HDFBackend(filename) # Clear the backend in case the file already exists backend.reset(self.nwalkers, ndim) @@ -131,25 +153,25 @@ class MCMC(): # Define emcee sampler # Here we'll set up the computation. emcee combines multiple "walkers", # each of which is its own MCMC chain. The number of trace results will - # be nwalkers * nsteps + # be nwalkers * nsteps. if self.mp: # Run in parallel - if nrCPUs is None: - nrCPUs = multiprocessing.cpu_count() + if n_cpus is None: + n_cpus = multiprocessing.cpu_count() - with multiprocessing.Pool(nrCPUs) as pool: - sampler = emcee.EnsembleSampler(self.nwalkers, ndim, - self.log_posterior, - moves=self.moves, - pool=pool, backend=backend) + with multiprocessing.Pool(n_cpus) as pool: + sampler = emcee.EnsembleSampler( + self.nwalkers, ndim, self.log_posterior, moves=self.moves, + pool=pool, backend=backend + ) # Check if a burn-in phase is needed! if self.initsamples is None: # Burn-in print("\n Burn-in period is starting:") - pos = sampler.run_mcmc(initsamples, - self.nburn, - progress=True) + pos = sampler.run_mcmc( + initsamples, self.nburn, progress=True + ) # Reset sampler sampler.reset() @@ -159,23 +181,24 @@ class MCMC(): # Production run print("\n Production run is starting:") - pos, prob, state = sampler.run_mcmc(pos, - self.nsteps, - progress=True) + pos, prob, state = sampler.run_mcmc( + pos, self.nsteps, progress=True + ) else: # Run in series and monitor the convergence - sampler = emcee.EnsembleSampler(self.nwalkers, ndim, - self.log_posterior, - moves=self.moves, - backend=backend, - vectorize=True) + sampler = emcee.EnsembleSampler( + self.nwalkers, ndim, self.log_posterior, moves=self.moves, + backend=backend, vectorize=True + ) # Check if a burn-in phase is needed! if self.initsamples is None: # Burn-in print("\n Burn-in period is starting:") - pos = sampler.run_mcmc(initsamples, self.nburn, progress=True) + pos = sampler.run_mcmc( + initsamples, self.nburn, progress=True + ) # Reset sampler sampler.reset() @@ -206,7 +229,7 @@ class MCMC(): if hasattr(BayesObj, 'errorModel') and BayesObj.errorModel \ and not sampler.iteration % 3 * autocorreverynsteps: try: - self.errorMetaModel = self.train_errorModel(sampler) + self.error_MetaModel = self.train_error_model(sampler) except: pass @@ -268,21 +291,21 @@ class MCMC(): print("* These values must be smaller than 1.1.") print('-'*50) - print(f"\n>>>> Bayesian inference with MCMC for {self.BayesOpts.Name} " + print(f"\n>>>> Bayesian inference with MCMC for {self.BayesOpts.name} " "successfully completed. <<<<<<\n") # Extract parameter names and their prior ranges - par_names = PCEModel.ExpDesign.par_names + par_names = MetaModel.ExpDesign.par_names - if Discrepancy.optSigma != 'B': + if Discrepancy.opt_sigma != 'B': for i in range(len(Discrepancy.InputDisc.Marginals)): - par_names.append(Discrepancy.InputDisc.Marginals[i].Name) + par_names.append(Discrepancy.InputDisc.Marginals[i].name) - params_range = PCEModel.ExpDesign.bound_tuples + params_range = MetaModel.ExpDesign.bound_tuples # Plot traces if self.verbose and self.nsteps < 10000: - pdf = PdfPages(OutputDir+'/traceplots.pdf') + pdf = PdfPages(output_dir+'/traceplots.pdf') fig = plt.figure() for parIdx in range(ndim): # Set up the axes with gridspec @@ -323,10 +346,10 @@ class MCMC(): plt.ylim(0, np.nanmax(taus)+0.1*(np.nanmax(taus)-np.nanmin(taus))) plt.xlabel("number of steps") plt.ylabel(r"mean $\hat{\tau}$") - fig1.savefig(f"{OutputDir}/autocorrelation_time.pdf", + fig1.savefig(f"{output_dir}/autocorrelation_time.pdf", bbox_inches='tight') - # logml_dict = self.Marginal_llk_emcee(sampler, self.nburn, logp=None, + # logml_dict = self.marginal_llk_emcee(sampler, self.nburn, logp=None, # maxiter=5000) # print('\nThe Bridge Sampling Estimation is " # f"{logml_dict['logml']:.5f}.') @@ -354,93 +377,137 @@ class MCMC(): return Posterior_df + # ------------------------------------------------------------------------- def log_prior(self, theta): + """ + Calculates the log prior likelihood for the given parameter set(s) + theta. + + Parameters + ---------- + theta : array of shape (n_samples, n_params) + Parameter sets, i.e. proposals of MCMC chains. + + Returns + ------- + logprior: float or array of shape n_samples + Log prior likelihood. If theta has only one row, a single value is + returned otherwise an array. - PCEModel = self.BayesOpts.PCEModel + """ + + MetaModel = self.BayesOpts.MetaModel Discrepancy = self.BayesOpts.Discrepancy - nSigma2 = len(Discrepancy.ExpDesign.bound_tuples) if Discrepancy.optSigma != 'B' else -len(theta) - priorDist = PCEModel.ExpDesign.priorSpace - params_range = PCEModel.ExpDesign.bound_tuples - ndimTheta = theta.ndim - theta = theta if ndimTheta != 1 else theta.reshape((1,-1)) + disc_bound_tuples = Discrepancy.ExpDesign.bound_tuples + disc_marginals = Discrepancy.ExpDesign.InputObj.Marginals + disc_prior_space = Discrepancy.ExpDesign.prior_space + # Find the number of sigma2 parameters + if Discrepancy.opt_sigma != 'B': + n_sigma2 = len(disc_bound_tuples) + else: + n_sigma2 = -len(theta) + prior_dist = MetaModel.ExpDesign.prior_space + params_range = MetaModel.ExpDesign.bound_tuples + theta = theta if theta.ndim != 1 else theta.reshape((1, -1)) nsamples = theta.shape[0] logprior = -np.inf*np.ones(nsamples) for i in range(nsamples): # Check if the sample is within the parameters' range - if self.check_ranges(theta[i], params_range): + if self._check_ranges(theta[i], params_range): # Check if all dists are uniform, if yes priors are equal. - if all(PCEModel.input_obj.Marginals[i].DistType == 'uniform' for i in \ - range(PCEModel.NofPa)): + if all(MetaModel.input_obj.Marginals[i].dist_type == 'uniform' + for i in range(MetaModel.n_params)): logprior[i] = 0.0 else: - logprior[i] = np.log(priorDist.pdf(theta[i,:-nSigma2].T)) + logprior[i] = np.log( + prior_dist.pdf(theta[i, :-n_sigma2].T) + ) # Check if bias term needs to be inferred - if Discrepancy.optSigma != 'B': - if self.check_ranges(theta[i,-nSigma2:], - Discrepancy.ExpDesign.bound_tuples): - if all('unif' in Discrepancy.ExpDesign.InputObj.Marginals[i].DistType for i in \ - range(Discrepancy.ExpDesign.ndim)): + if Discrepancy.opt_sigma != 'B': + if self._check_ranges(theta[i, -n_sigma2:], + disc_bound_tuples): + if all('unif' in disc_marginals[i].dist_type for i in + range(Discrepancy.ExpDesign.ndim)): logprior[i] = 0.0 else: - logprior[i] += np.log(Discrepancy.ExpDesign.priorSpace.pdf(theta[i,-nSigma2:])) + logprior[i] += np.log( + disc_prior_space.pdf(theta[i, -n_sigma2:]) + ) if nsamples == 1: return logprior[0] else: return logprior + # ------------------------------------------------------------------------- def log_likelihood(self, theta): + """ + Computes likelihood p(y|theta, obs) of the performance of the + (meta-)model in reproducing the observation data. + + Parameters + ---------- + theta : array of shape (n_samples, n_params) + Parameter set, i.e. proposals of the MCMC chains. + + Returns + ------- + log_like : array of shape (n_samples) + Log likelihood. + + """ BayesOpts = self.BayesOpts - PCEModel = BayesOpts.PCEModel + MetaModel = BayesOpts.MetaModel Discrepancy = self.BayesOpts.Discrepancy - if Discrepancy.optSigma != 'B': - nSigma2 = len(Discrepancy.ExpDesign.bound_tuples) + disc_bound_tuples = Discrepancy.ExpDesign.bound_tuples + # Find the number of sigma2 parameters + if Discrepancy.opt_sigma != 'B': + n_sigma2 = len(disc_bound_tuples) else: - nSigma2 = -len(theta) + n_sigma2 = -len(theta) # Check if bias term needs to be inferred - if Discrepancy.optSigma != 'B': - Sigma2 = theta[:, -nSigma2:] - theta = theta[:, :-nSigma2] + if Discrepancy.opt_sigma != 'B': + sigma2 = theta[:, -n_sigma2:] + theta = theta[:, :-n_sigma2] else: - Sigma2 = None - ndimTheta = theta.ndim - theta = theta if ndimTheta != 1 else theta.reshape((1, -1)) + sigma2 = None + theta = theta if theta.ndim != 1 else theta.reshape((1, -1)) - # Evaluate Model/PCEModel at theta - meanPCEPriorPred, BayesOpts._stdPCEPriorPred = self.eval_model(theta) + # Evaluate Model/MetaModel at theta + mean_pred, BayesOpts._std_pce_prior_pred = self.eval_model(theta) # Surrogate model's error using RMSE of test data - surrError = PCEModel.RMSE if hasattr(PCEModel, 'RMSE') else None + surrError = MetaModel.rmse if hasattr(MetaModel, 'rmse') else None # Likelihood - return BayesOpts.normpdf(meanPCEPriorPred, self.Observation, - self.TotalSigma2, Sigma2, std=surrError) + log_like = BayesOpts.normpdf( + mean_pred, self.observation, self.total_sigma2, sigma2, + std=surrError + ) + return log_like + # ------------------------------------------------------------------------- def log_posterior(self, theta): """ - Computes the posterior likelihood for the given parameterset. + Computes the posterior likelihood p(theta| obs) for the given + parameterset. Parameters ---------- - theta : TYPE - DESCRIPTION. - Observation : TYPE - DESCRIPTION. - TotalSigma2 : TYPE - DESCRIPTION. + theta : array of shape (n_samples, n_params) + Parameter set, i.e. proposals of the MCMC chains. Returns ------- - TYPE - DESCRIPTION. + log_like : array of shape (n_samples) + Log posterior likelihood. """ - ndimTheta = theta.ndim - nsamples = 1 if ndimTheta == 1 else theta.shape[0] + nsamples = 1 if theta.ndim == 1 else theta.shape[0] if nsamples == 1: if self.log_prior(theta) == -np.inf: @@ -460,62 +527,95 @@ class MCMC(): log_likelihood = -np.inf*np.ones(nsamples) # find the indices for -inf sets - nonInfIdx = np.where(log_prior != -np.inf)[0] + non_inf_idx = np.where(log_prior != -np.inf)[0] # Compute loLikelihoods - if nonInfIdx.size != 0: - log_likelihood[nonInfIdx] = self.log_likelihood(theta[nonInfIdx]) + if non_inf_idx.size != 0: + log_likelihood[non_inf_idx] = self.log_likelihood( + theta[non_inf_idx] + ) return log_prior + log_likelihood + # ------------------------------------------------------------------------- def eval_model(self, theta): + """ + Evaluates the (meta-) model at the given theta. + + Parameters + ---------- + theta : array of shape (n_samples, n_params) + Parameter set, i.e. proposals of the MCMC chains. + + Returns + ------- + mean_pred : dict + Mean model prediction. + std_pred : dict + Std of model prediction. + + """ BayesObj = self.BayesOpts - PCEModel = BayesObj.PCEModel - Model = PCEModel.ModelObj + MetaModel = BayesObj.MetaModel + Model = MetaModel.ModelObj if BayesObj.emulator: - # Evaluate the PCEModel - meanPred, stdPred = PCEModel.eval_metamodel(samples=theta, - name=BayesObj.Name) + # Evaluate the MetaModel + mean_pred, std_pred = MetaModel.eval_metamodel(samples=theta) else: # Evaluate the origModel # Prepare the function - meanPred, stdPred = dict(), dict() - OutputNames = Model.Output.Names + mean_pred, std_pred = dict(), dict() + output_names = Model.Output.names try: - Filename = Model.pyFile - Function = getattr(__import__(Filename), Filename) + filename = Model.py_file + function = getattr(__import__(filename), filename) # Run function for theta - modelOuts = Function(theta.reshape(1, -1)) + model_outs = function(theta.reshape(1, -1)) except: - modelOuts, _ = Model.run_model_parallel(theta[0], - prevRun_No=self.counter, - keyString='_MCMC', - mp=False) + model_outs, _ = Model.run_model_parallel( + theta[0], prevRun_No=self.counter, + key_str='_MCMC', mp=False) # Save outputs in respective dicts - for varIdx, var in enumerate(OutputNames): - meanPred[var] = modelOuts[var] - stdPred[var] = np.zeros((meanPred[var].shape)) + for varIdx, var in enumerate(output_names): + mean_pred[var] = model_outs[var] + std_pred[var] = np.zeros((mean_pred[var].shape)) # Remove the folder - shutil.rmtree(Model.Name + '_MCMC_' + str(self.counter+1)) + shutil.rmtree(f"{Model.Name}_MCMC_{self.counter+1}") # Add one to the counter self.counter += 1 - if hasattr(self, 'errorMetaModel') and BayesObj.errorModel: - meanPred, stdPred = self.errorMetaModel.eval_model_error( - BayesObj.BiasInputs, meanPred) + if hasattr(self, 'error_MetaModel') and BayesObj.error_model: + meanPred, stdPred = self.error_MetaModel.eval_model_error( + BayesObj.BiasInputs, mean_pred + ) + + return mean_pred, std_pred - return meanPred, stdPred + # ------------------------------------------------------------------------- + def train_error_model(self, sampler): + """ + Trains an error model using a Gaussian Process Regression. + + Parameters + ---------- + sampler : obj + emcee sampler. + + Returns + ------- + error_MetaModel : obj + A error model. - def train_errorModel(self, sampler): + """ BayesObj = self.BayesOpts - PCEModel = BayesObj.PCEModel + MetaModel = BayesObj.MetaModel # Prepare the poster samples try: @@ -529,113 +629,180 @@ class MCMC(): burnin = int(2*np.nanmax(tau)) thin = int(0.5*np.nanmin(tau)) if int(0.5*np.nanmin(tau)) != 0 else 1 finalsamples = sampler.get_chain(discard=burnin, flat=True, thin=thin) - Posterior_df = finalsamples[:, :PCEModel.NofPa] + posterior = finalsamples[:, :MetaModel.n_params] # Select posterior mean as MAP - MAP_theta = Posterior_df.mean(axis=0).reshape((1, PCEModel.NofPa)) + map_theta = posterior.mean(axis=0).reshape((1, MetaModel.n_params)) # MAP_theta = st.mode(Posterior_df,axis=0)[0] # Evaluate the (meta-)model at the MAP - y_MAP, y_std_MAP = PCEModel.eval_metamodel(samples=MAP_theta, - name=BayesObj.Name) + y_map, y_std_map = MetaModel.eval_metamodel(samples=map_theta) # Train a GPR meta-model using MAP - errorMetaModel = PCEModel.create_model_error(BayesObj.BiasInputs, - y_MAP, Name='Calib') - return errorMetaModel + error_MetaModel = MetaModel.create_model_error( + BayesObj.BiasInputs, y_map, name='Calib') + + return error_MetaModel - def Marginal_llk_emcee(self, sampler, nburn=None, logp=None, maxiter=1000): + # ------------------------------------------------------------------------- + def gelman_rubin(self, chain, return_var=False): """ - The Bridge Sampling Estimator of the Marginal Likelihood. + The potential scale reduction factor (PSRF) defined by the variance + within one chain, W, with the variance between chains B. + Both variances are combined in a weighted sum to obtain an estimate of + the variance of a parameter θ.The square root of the ratio of this + estimates variance to the within chain variance is called the potential + scale reduction. + For a well converged chain it should approach 1. Values greater than + typically 1.1 indicate that the chains have not yet fully converged. + + Source: http://joergdietrich.github.io/emcee-convergence.html + + https://github.com/jwalton3141/jwalton3141.github.io/blob/master/assets/posts/ESS/rwmh.py Parameters ---------- - mtrace : MultiTrace, result of MCMC run - model : PyMC Model - Optional model. Default None, taken from context. - logp : Model Log-probability function, read from the model by default - maxiter : Maximum number of iterations + chain : array (n_walkers, n_steps, n_params) + DESCRIPTION. Returns ------- - marg_llk : Estimated Marginal log-Likelihood. + R_hat : float + The Gelman-Robin values. + + """ + m_chains, n_iters = chain.shape[:2] + + # Calculate between-chain variance + θb = np.mean(chain, axis=1) + θbb = np.mean(θb, axis=0) + B_over_n = ((θbb - θb)**2).sum(axis=0) + B_over_n /= (m_chains - 1) + + # Calculate within-chain variances + ssq = np.var(chain, axis=1, ddof=1) + W = np.mean(ssq, axis=0) + + # (over) estimate of variance + var_θ = W * (n_iters - 1) / n_iters + B_over_n + + if return_var: + return var_θ + else: + # The square root of the ratio of this estimates variance to the + # within chain variance + R_hat = np.sqrt(var_θ / W) + return R_hat + + # ------------------------------------------------------------------------- + def marginal_llk_emcee(self, sampler, nburn=None, logp=None, maxiter=1000): + """ + The Bridge Sampling Estimator of the Marginal Likelihood based on + https://gist.github.com/junpenglao/4d2669d69ddfe1d788318264cdcf0583 + + Parameters + ---------- + sampler : TYPE + MultiTrace, result of MCMC run. + nburn : int, optional + Number of burn-in step. The default is None. + logp : TYPE, optional + Model Log-probability function. The default is None. + maxiter : int, optional + Maximum number of iterations. The default is 1000. + + Returns + ------- + marg_llk : dict + Estimated Marginal log-Likelihood. + """ r0, tol1, tol2 = 0.5, 1e-10, 1e-4 - + if logp is None: logp = sampler.log_prob_fn - - - # Split the samples into two parts - # Use the first 50% for fiting the proposal distribution and the second 50% - # in the iterative scheme. + + # Split the samples into two parts + # Use the first 50% for fiting the proposal distribution + # and the second 50% in the iterative scheme. if nburn is None: mtrace = sampler.chain else: mtrace = sampler.chain[:, nburn:, :] - + nchain, len_trace, nrofVars = mtrace.shape - + N1_ = len_trace // 2 N1 = N1_*nchain N2 = len_trace*nchain - N1 - + samples_4_fit = np.zeros((nrofVars, N1)) samples_4_iter = np.zeros((nrofVars, N2)) effective_n = np.zeros((nrofVars)) - + # matrix with already transformed samples for var in range(nrofVars): - + # for fitting the proposal - x = mtrace[:,:N1_,var] - + x = mtrace[:, :N1_, var] + samples_4_fit[var, :] = x.flatten() # for the iterative scheme - x2 = mtrace[:,N1_:,var] + x2 = mtrace[:, N1_:, var] samples_4_iter[var, :] = x2.flatten() - - # effective sample size of samples_4_iter, scalar - #(https://github.com/jwalton3141/jwalton3141.github.io/blob/master/assets/posts/ESS/rwmh.py) - effective_n[var] = self.my_ESS(x2) - + + # effective sample size of samples_4_iter, scalar + effective_n[var] = self._my_ESS(x2) + # median effective sample size (scalar) neff = np.median(effective_n) - + # get mean & covariance matrix and generate samples from proposal m = np.mean(samples_4_fit, axis=1) V = np.cov(samples_4_fit) L = chol(V, lower=True) - + # Draw N2 samples from the proposal distribution - gen_samples = m[:, None] + np.dot(L, st.norm.rvs(0, 1, - size=samples_4_iter.shape)) - + gen_samples = m[:, None] + np.dot( + L, st.norm.rvs(0, 1, size=samples_4_iter.shape) + ) + # Evaluate proposal distribution for posterior & generated samples q12 = st.multivariate_normal.logpdf(samples_4_iter.T, m, V) q22 = st.multivariate_normal.logpdf(gen_samples.T, m, V) - + # Evaluate unnormalized posterior for posterior & generated samples q11 = logp(samples_4_iter.T) q21 = logp(gen_samples.T) - + # Run iterative scheme: - tmp = self.iterative_scheme(N1, N2, q11, q12, q21, q22, r0, neff, tol1, maxiter, 'r') + tmp = self._iterative_scheme( + N1, N2, q11, q12, q21, q22, r0, neff, tol1, maxiter, 'r' + ) if ~np.isfinite(tmp['logml']): - warnings.warn("""logml could not be estimated within maxiter, rerunning with - adjusted starting value. Estimate might be more variable than usual.""") + warnings.warn( + "logml could not be estimated within maxiter, rerunning with " + "adjusted starting value. Estimate might be more variable than" + " usual.") # use geometric mean as starting value r0_2 = np.sqrt(tmp['r_vals'][-2]*tmp['r_vals'][-1]) - tmp = self.iterative_scheme(q11, q12, q21, q22, r0_2, neff, tol2, maxiter, 'logml') - - return dict(logml = tmp['logml'], niter = tmp['niter'], method = "normal", - q11 = q11, q12 = q12, q21 = q21, q22 = q22) - - - def iterative_scheme(self,N1, N2, q11, q12, q21, q22, r0, neff, tol, maxiter, criterion): + tmp = self._iterative_scheme( + q11, q12, q21, q22, r0_2, neff, tol2, maxiter, 'logml' + ) + + marg_llk = dict( + logml=tmp['logml'], niter=tmp['niter'], method="normal", + q11=q11, q12=q12, q21=q21, q22=q22 + ) + return marg_llk + + # ------------------------------------------------------------------------- + def _iterative_scheme(self, N1, N2, q11, q12, q21, q22, r0, neff, tol, + maxiter, criterion): """ - Iterative scheme as proposed in Meng and Wong (1996) to estimate the marginal likelihood - + Iterative scheme as proposed in Meng and Wong (1996) to estimate the + marginal likelihood + Parameters ---------- N1 : TYPE @@ -660,24 +827,25 @@ class MCMC(): DESCRIPTION. criterion : TYPE DESCRIPTION. - + Returns ------- TYPE DESCRIPTION. - + """ l1 = q11 - q12 l2 = q21 - q22 - lstar = np.median(l1) # To increase numerical stability, - # subtracting the median of l1 from l1 & l2 later + # To increase numerical stability, + # subtracting the median of l1 from l1 & l2 later + lstar = np.median(l1) s1 = neff/(neff + N2) s2 = N2/(neff + N2) r = r0 r_vals = [r] logml = np.log(r) + lstar criterion_val = 1 + tol - + i = 0 while (i <= maxiter) & (criterion_val > tol): rold = r @@ -685,74 +853,78 @@ class MCMC(): numi = np.exp(l2 - lstar)/(s1 * np.exp(l2 - lstar) + s2 * r) deni = 1/(s1 * np.exp(l1 - lstar) + s2 * r) if np.sum(~np.isfinite(numi))+np.sum(~np.isfinite(deni)) > 0: - warnings.warn("""Infinite value in iterative scheme, returning NaN. - Try rerunning with more samples.""") + warnings.warn( + """Infinite value in iterative scheme, returning NaN. + Try rerunning with more samples.""") r = (N1/N2) * np.sum(numi)/np.sum(deni) r_vals.append(r) logml = np.log(r) + lstar i += 1 - if criterion=='r': + if criterion == 'r': criterion_val = np.abs((r - rold)/r) - elif criterion=='logml': + elif criterion == 'logml': criterion_val = np.abs((logml - logmlold)/logml) - + if i >= maxiter: - return dict(logml = np.NaN, niter = i, r_vals = np.asarray(r_vals)) + return dict(logml=np.NaN, niter=i, r_vals=np.asarray(r_vals)) else: - return dict(logml = logml, niter = i) - - - def my_gelman_rubin(self, x): - """ Estimate the marginal posterior variance. Vectorised implementation. """ - m_chains, n_iters = x.shape - - # Calculate between-chain variance - B_over_n = ((np.mean(x, axis=1) - np.mean(x))**2).sum() / (m_chains - 1) - - # Calculate within-chain variances - W = ((x - x.mean(axis=1, keepdims=True))**2).sum() / (m_chains*(n_iters - 1)) - - # (over) estimate of variance - s2 = W * (n_iters - 1) / n_iters + B_over_n - - return s2 - - def my_ESS(self, x): - """ - Compute the effective sample size of estimand of interest. - Vectorised implementation. + return dict(logml=logml, niter=i) + + # ------------------------------------------------------------------------- + def _my_ESS(self, x): + """ + Compute the effective sample size of estimand of interest. + Vectorised implementation. + https://github.com/jwalton3141/jwalton3141.github.io/blob/master/assets/posts/ESS/rwmh.py + + + Parameters + ---------- + x : array of shape (n_walkers, n_steps) + MCMC Samples. + + Returns + ------- + int + Effective sample size. + """ m_chains, n_iters = x.shape - - variogram = lambda t: ((x[:, t:] - x[:, :(n_iters - t)])**2).sum() / (m_chains * (n_iters - t)) - - post_var = self.my_gelman_rubin(x) - + + def variogram(t): + variogram = ((x[:, t:] - x[:, :(n_iters - t)])**2).sum() + variogram /= (m_chains * (n_iters - t)) + return variogram + + post_var = self.gelman_rubin(x, return_var=True) + t = 1 rho = np.ones(n_iters) negative_autocorr = False - - # Iterate until the sum of consecutive estimates of autocorrelation is negative + + # Iterate until the sum of consecutive estimates of autocorrelation is + # negative while not negative_autocorr and (t < n_iters): rho[t] = 1 - variogram(t) / (2 * post_var) - + if not t % 2: negative_autocorr = sum(rho[t-1:t+1]) < 0 - + t += 1 - + return int(m_chains*n_iters / (1 + 2*rho[1:t].sum())) - - def check_ranges(self, theta, ranges): + + # ------------------------------------------------------------------------- + def _check_ranges(self, theta, ranges): """ - This function checks if theta lies in the given ranges + This function checks if theta lies in the given ranges. Parameters ---------- - theta : numpy array + theta : array Proposed parameter set. - ranges : TYPE - DESCRIPTION. + ranges : nested list + List of the praremeter ranges. Returns ------- @@ -761,48 +933,11 @@ class MCMC(): """ c = True - #sigma = theta[-1] - # traverse in the list1 - for i, bounds in enumerate(ranges): + # traverse in the list1 + for i, bounds in enumerate(ranges): x = theta[i] # condition check - if x < bounds[0] or x > bounds[1]: #or sigma < 0: + if x < bounds[0] or x > bounds[1]: c = False return c return c - - def gelman_rubin(self, chain): - """ - The potential scale reduction factor (PSRF) defined by the variance within - one chain, W, with the variance between chains B. - Both variances are combined in a weighted sum to obtain - an estimate of the variance of a parameter θ.The square root of the ratio - of this estimates variance to the within chain variance is called the - potential scale reduction. - For a well converged chain it should approach 1. Values greater than - typically 1.1 indicate that the chains have not yet fully converged. - - Source: http://joergdietrich.github.io/emcee-convergence.html - - Parameters - ---------- - chain : array (nWalkers, nSteps, nparams) - DESCRIPTION. - - Returns - ------- - R_hat : float - The Gelman-Robin values. - - """ - ssq = np.var(chain, axis=1, ddof=1) - W = np.mean(ssq, axis=0) - θb = np.mean(chain, axis=1) - θbb = np.mean(θb, axis=0) - m = chain.shape[0] - n = chain.shape[1] - B = n / (m - 1) * np.sum((θbb - θb)**2, axis=0) - var_θ = (n - 1) / n * W + B / n - R_hat = np.sqrt(var_θ / W) - - return R_hat \ No newline at end of file diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 51f81eedc..a0b0738e6 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -This class offers helper functions for post-processing the metamodels. +This class offers helper functions for post-processing of the metamodels. Author: Farid Mohammadi, M.Sc. E-Mail: farid.mohammadi@iws.uni-stuttgart.de @@ -22,92 +22,65 @@ import pandas as pd import scipy.stats as stats from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score +from statsmodels.graphics.gofplots import qqplot from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt import matplotlib.ticker as ticker -SIZE = 30 -plt.rc('figure', figsize=(24, 16)) -plt.rc('font', family='serif', serif='Arial') -plt.rc('font', size=SIZE) -plt.rc('axes', grid=True) -plt.rc('text', usetex=True) -plt.rc('axes', linewidth=3) -plt.rc('axes', grid=True) -plt.rc('grid', linestyle="-") -plt.rc('axes', titlesize=SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=SIZE) # legend fontsize -plt.rc('figure', titlesize=SIZE) # fontsize of the figure title +from matplotlib.offsetbox import AnchoredText +from matplotlib.patches import Patch +# Load the mplstyle +plt.style.use(os.path.join(os.path.split(__file__)[0], + '../', 'bayesvalidrox.mplstyle')) class PostProcessing: - def __init__(self, PCEModel, Name='calib'): - self.PCEModel = PCEModel - self.Name = Name - self.PCEMeans = {} - self.PCEStd = {} - self.NrofSamples = None - self.Samples = [] - self.Samplesu = [] - self.ModelOutputs = {} - self.PCEOutputs = [] - self.PCEOutputs_std = [] - self.sobol_cell = {} - self.total_sobol = {} - self.Likelihoods = [] + def __init__(self, MetaModel, name='calib'): + self.MetaModel = MetaModel + self.name = name # ------------------------------------------------------------------------- - def PCEMoments(self): - - PCEModel = self.PCEModel - Model = PCEModel.ModelObj - - for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): - - PCEMean = np.zeros((len(ValuesDict))) - PCEVar = np.zeros((len(ValuesDict))) - - for Inkey, InIdxValues in ValuesDict.items(): - idx = int(Inkey.split('_')[1]) - 1 - coeffs = PCEModel.coeffs_dict[Outkey][Inkey] - - # Mean = c_0 - PCEMean[idx] = coeffs[0] if coeffs[0]!=0 else PCEModel.clf_poly[Outkey][Inkey].intercept_ - # Var = sum(coeffs[1:]**2) - PCEVar[idx] = np.sum(np.square(coeffs[1:])) - - if PCEModel.dim_red_method.lower() == 'pca': - PCA = PCEModel.pca[Outkey] - self.PCEMeans[Outkey] = PCA.mean_ + np.dot(PCEMean, PCA.components_) - self.PCEStd[Outkey] = np.sqrt(np.dot(PCEVar, PCA.components_**2)) - else: - self.PCEMeans[Outkey] = PCEMean - self.PCEStd[Outkey] = np.sqrt(PCEVar) - try: - self.Reference = Model.read_mc_reference() - except: - pass + def plot_moments(self, xlabel='Time [s]', plot_type=None, save_fig=True): + """ + Plots the moments in a pdf format in the directory + `Outputs_PostProcessing`. + + Parameters + ---------- + xlabel : string, optional + String to be displayed as x-label. The default is 'Time [s]'. + plot_type : string, optional + Options: bar or line. The default is None. + save_fig : bool, optional + Save figure or not. The default is True. + + Returns + ------- + pce_means: dict + Mean of the model outputs. + pce_means: dict + Standard deviation of the model outputs. - # ------------------------------------------------------------------------- - def plotMoments(self, xlabel='Time [s]', plotType=None, SaveFig=True): + """ - barPlot = True if plotType == 'bar' else False - metaModel = self.PCEModel.meta_model_type + bar_plot = True if plot_type == 'bar' else False + meta_model_type = self.MetaModel.meta_model_type + Model = self.MetaModel.ModelObj + + # Read Monte-Carlo reference + self.mc_reference = Model.read_mc_reference() # Set the x values - x_values_orig = self.PCEModel.ExpDesign.x_values + x_values_orig = self.MetaModel.ExpDesign.x_values # Compute the moments with the PCEModel object - self.PCEMoments() + self._compute_pce_moments() # Get the variables - Keys = list(self.PCEMeans.keys()) + out_names = Model.Output.names # Open a pdf for the plots - if SaveFig: - newpath = (f'Outputs_PostProcessing_{self.Name}/') + if save_fig: + newpath = (f'Outputs_PostProcessing_{self.name}/') if not os.path.exists(newpath): os.makedirs(newpath) @@ -116,12 +89,12 @@ class PostProcessing: # Plot the best fit line, set the linewidth (lw), color and # transparency (alpha) of the line - for idx, key in enumerate(Keys): + for idx, key in enumerate(out_names): fig, ax = plt.subplots(nrows=1, ncols=2) # Extract mean and std - mean_data = self.PCEMeans[key] - std_data = self.PCEStd[key] + mean_data = self.pce_means[key] + std_data = self.pce_stds[key] # Extract a list of x values if type(x_values_orig) is dict: @@ -130,50 +103,50 @@ class PostProcessing: x = x_values_orig # Plot: bar plot or line plot - if barPlot: + if bar_plot: ax[0].bar(list(map(str, x)), mean_data, color='b', width=0.25) ax[1].bar(list(map(str, x)), std_data, color='b', width=0.25) - ax[0].legend(labels=[metaModel]) - ax[1].legend(labels=[metaModel]) + ax[0].legend(labels=[meta_model_type]) + ax[1].legend(labels=[meta_model_type]) else: ax[0].plot(x, mean_data, lw=3, color='k', marker='x', - label=metaModel) + label=meta_model_type) ax[1].plot(x, std_data, lw=3, color='k', marker='x', - label=metaModel) + label=meta_model_type) - if self.Reference is not None: - if barPlot: - ax[0].bar(list(map(str, x)), self.Reference['mean'], + if self.mc_reference is not None: + if bar_plot: + ax[0].bar(list(map(str, x)), self.mc_reference['mean'], color='r', width=0.25) - ax[1].bar(list(map(str, x)), self.Reference['std'], + ax[1].bar(list(map(str, x)), self.mc_reference['std'], color='r', width=0.25) - ax[0].legend(labels=[metaModel]) - ax[1].legend(labels=[metaModel]) + ax[0].legend(labels=[meta_model_type]) + ax[1].legend(labels=[meta_model_type]) else: - ax[0].plot(x, self.Reference['mean'], lw=3, marker='x', + ax[0].plot(x, self.mc_reference['mean'], lw=3, marker='x', color='r', label='Ref.') - ax[1].plot(x, self.Reference['std'], lw=3, marker='x', + ax[1].plot(x, self.mc_reference['std'], lw=3, marker='x', color='r', label='Ref.') # Label the axes and provide a title ax[0].set_xlabel(xlabel) ax[1].set_xlabel(xlabel) - ax[0].set_ylabel(Keys[idx]) - ax[1].set_ylabel(Keys[idx]) + ax[0].set_ylabel(key) + ax[1].set_ylabel(key) # Provide a title ax[0].set_title('Mean of ' + key) ax[1].set_title('Std of ' + key) - if not barPlot: + if not bar_plot: ax[0].legend(loc='best') ax[1].legend(loc='best') plt.tight_layout() - if SaveFig: + if save_fig: # save the current figure pdf.savefig(fig, bbox_inches='tight') @@ -182,704 +155,387 @@ class PostProcessing: pdf.close() - # ------------------------------------------------------------------------- - def get_Sample(self): - - PCEModel = self.PCEModel - - NrSamples = self.NrofSamples - - self.Samples = PCEModel.ExpDesign.generate_samples(NrSamples, 'random') - - return self.Samples - - # ------------------------------------------------------------------------- - def eval_PCEmodel_3D(self, SaveFig=True): - - self.NrofSamples = 1000 - - PCEModel = self.PCEModel - Model = self.PCEModel.ModelObj - NrofSamples = self.NrofSamples - - - # Create 3D-Grid - # TODO: Make it general - x = np.linspace(-5, 10, NrofSamples) - y = np.linspace(0, 15, NrofSamples) + return self.pce_means, self.pce_stds - - X, Y = np.meshgrid(x, y) - PCE_Z = np.zeros((self.NrofSamples, self.NrofSamples)) - Model_Z = np.zeros((self.NrofSamples, self.NrofSamples)) - - for idxMesh in range(self.NrofSamples): - - SampleMesh = np.vstack((X[:,idxMesh], Y[:,idxMesh])).T - - - - univ_p_val = PCEModel.univ_basis_vals(SampleMesh) - - for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): - - - - - PCEOutputs_mean = np.zeros((len(SampleMesh), len(ValuesDict))) - PCEOutputs_std = np.zeros((len(SampleMesh), len(ValuesDict))) - ModelOutputs = np.zeros((len(SampleMesh), len(ValuesDict))) - - for Inkey, InIdxValues in ValuesDict.items(): - idx = int(Inkey.split('_')[1]) - 1 - PolynomialDegrees = PCEModel.basis_dict[Outkey][Inkey] - clf_poly = PCEModel.clf_poly[Outkey][Inkey] - - PSI_Val = PCEModel.PCE_create_Psi(PolynomialDegrees, univ_p_val) - # Perdiction with error bar - y_mean, y_std = clf_poly.predict(PSI_Val, return_std=True) - - PCEOutputs_mean[:, idx] = y_mean - PCEOutputs_std[:, idx] = y_std - - # Model evaluation - ModelOutputsDict, _ = Model.run_model_parallel(SampleMesh, keyString='Valid3D') - ModelOutputs[:,idx]= ModelOutputsDict[Outkey].T - - - PCE_Z[:,idxMesh] = y_mean - Model_Z[:,idxMesh] = ModelOutputs[:,0] - # ---------------- 3D plot for PCEModel ----------------------- - from mpl_toolkits import mplot3d - fig_PCE = plt.figure() - ax = plt.axes(projection='3d') - ax.plot_surface(X, Y, PCE_Z, rstride=1, cstride=1, - cmap='viridis', edgecolor='none') - ax.set_title('PCEModel'); - ax.set_xlabel('$x_1$') - ax.set_ylabel('$x_2$') - ax.set_zlabel('$f(x_1,x_2)$'); - - plt.grid() - plt.show() - - if SaveFig: - # Saving the figure - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - - # 3D-plot PCEModel - fig_PCE.savefig('./'+newpath+'/3DPlot_PCEModel.pdf', format="pdf", - bbox_inches='tight') # save the figure to file - plt.close(fig_PCE) - - # ---------------- 3D plot for Model ----------------------- - fig_Model = plt.figure() - ax = plt.axes(projection='3d') - ax.plot_surface(X, Y, PCE_Z, rstride=1, cstride=1, - cmap='viridis', edgecolor='none') - ax.set_title('Model'); - ax.set_xlabel('$x_1$') - ax.set_ylabel('$x_2$') - ax.set_zlabel('$f(x_1,x_2)$'); - - - plt.grid() - plt.show() - - if SaveFig is True: - # ---------------- Saving the figure and text files ----------------------- - # 3D-plot Model - fig_Model.savefig('./'+newpath+'/3DPlot_Model.pdf', format="pdf", - bbox_inches='tight') # save the figure to file - plt.close(fig_Model) - - return - - # ------------------------------------------------------------------------- - def eval_Model(self, Samples=None, keyString='Valid'): + def valid_metamodel(self, n_samples=1, samples=None, x_axis="Time [s]"): """ - Evaluate Forward Model - - """ - Model = self.PCEModel.ModelObj - - if Samples is None: - Samples = self.get_Sample() - self.Samples = Samples - else: - Samples = Samples - self.NrofSamples = len(Samples) - - ModelOutputs, CollocationPoints = Model.run_model_parallel(Samples, keyString=keyString) - - self.ModelOutputs = ModelOutputs - - return self.ModelOutputs - - # ------------------------------------------------------------------------- - def validMetamodel(self, nValidSamples=1, samples=None, x_axis="Time [s]"): - """ - Evaluate the meta model and the PCEModel + Evaluates and plots the meta model and the PCEModel outputs for the + given number of samples or the given samples. + + Parameters + ---------- + n_samples : int, optional + Number of samples to be evaluated. The default is 1. + samples : array of shape (n_samples, n_params), optional + Samples to be evaluated. The default is None. + x_axis : string, optional + Label of x axis. The default is "Time [s]". + + Returns + ------- + None. + """ - metaModel = self.PCEModel - + MetaModel = self.MetaModel + Model = MetaModel.ModelObj + if samples is None: - self.NrofSamples = nValidSamples - samples = self.get_Sample() + self.n_samples = n_samples + samples = self._get_sample() else: - self.NrofSamples = samples.shape[0] - - - x_values = self.PCEModel.ExpDesign.x_values - - self.ModelOutputs = self.eval_Model(samples, keyString='valid') - self.PCEOutputs, self.PCEOutputs_std = metaModel.eval_metamodel(samples=samples) + self.n_samples = samples.shape[0] + + # Extract x_values + x_values = MetaModel.ExpDesign.x_values + + self.model_out_dict = self._eval_model(samples, key_str='valid') + self.pce_out_mean, self.pce_out_std = MetaModel.eval_metamodel(samples) try: - key = list(self.ModelOutputs.keys())[1] - except: - key = list(self.ModelOutputs.keys())[0] - - NrofOutputs = self.ModelOutputs[key].shape[1] - - if NrofOutputs == 1: - self.plotValidation() + key = Model.Output.names[1] + except IndexError: + key = Model.Output.names[0] + + n_obs = self.model_out_dict[key].shape[1] + + if n_obs == 1: + self._plot_validation() else: - self.plotValidationMulti(x_values=x_values, x_axis=x_axis) - + self._plot_validation_multi(x_values=x_values, x_axis=x_axis) + # ------------------------------------------------------------------------- - def accuracyCheckMetaModel(self, nSamples=None, Samples=None, validOutputsDict=None): + def check_accuracy(self, n_samples=None, samples=None, outputs=None): """ - Evaluate the relative error of the PCEModel + Checks accuracy of the metamodel by computing the root mean square + error and validation error for all outputs. + + Parameters + ---------- + n_samples : int, optional + Number of samples. The default is None. + samples : array of shape (n_samples, n_params), optional + Parameter sets to be checked. The default is None. + outputs : dict, optional + Output dictionary with model outputs for all given output types in + `Model.Output.names`. The default is None. + + Raises + ------ + Exception + When neither n_samples nor samples are provided. + + Returns + ------- + rmse: dict + Root mean squared error for each output. + valid_error : dict + Validation error for each output. + """ - metaModel = self.PCEModel - + MetaModel = self.MetaModel + Model = MetaModel.ModelObj + # Set the number of samples - self.NrofSamples = nSamples if nSamples is not None else Samples.shape[0] - - # Generate random samples - Samples = self.get_Sample() if Samples is None else Samples - + if n_samples: + self.n_samples = n_samples + elif samples is not None: + self.n_samples = samples.shape[0] + else: + raise Exception("Please provide either samples or pass number of " + "samples!") + + # Generate random samples if necessary + Samples = self._get_sample() if samples is None else samples + # Run the original model with the generated samples - ModelOutputs = self.eval_Model(Samples, keyString='validSet') if validOutputsDict is None else validOutputsDict + if outputs is None: + outputs = self._eval_model(Samples, key_str='validSet') # Run the PCE model with the generated samples - PCEOutputs, PCEOutputs_std = metaModel.eval_metamodel(samples=Samples) + pce_outputs, _ = MetaModel.eval_metamodel(samples=Samples) - self.RMSE = {} - self.validErr = {} + self.rmse = {} + self.valid_error = {} # Loop over the keys and compute RMSE error. - for key in list(ModelOutputs.keys())[1:]: - - self.RMSE[key] = mean_squared_error(ModelOutputs[key], PCEOutputs[key], squared=False, multioutput='raw_values') + for key in Model.Output.names: + # Root mena square + self.rmse[key] = mean_squared_error(outputs[key], pce_outputs[key], + squared=False, + multioutput='raw_values') + # Validation error + self.valid_error[key] = (self.rmse[key]**2 / self.n_samples) / \ + np.var(outputs[key], ddof=1, axis=0) - self.validErr[key] = (self.RMSE[key]**2 / self.NrofSamples) / \ - np.var(ModelOutputs[key],ddof=1, axis=0) - # Print a report table print("\n>>>>> Errors of {} <<<<<".format(key)) print("\nIndex | RMSE | Validation Error") print('-'*35) - print('\n'.join('{0} | {1:.3e} | {2:.3e}'.format(i+1,k,j) for i,(k,j) \ - in enumerate(zip(self.RMSE[key], self.validErr[key])))) + print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) + in enumerate(zip(self.rmse[key], + self.valid_error[key])))) # Save error dicts in PCEModel object - self.PCEModel.RMSE = self.RMSE - self.PCEModel.validErr = self.validErr - - # ------------------------------------------------------------------------- - def plotValidation(self, SaveFig=True): - """ + self.MetaModel.rmse = self.rmse + self.MetaModel.valid_error = self.valid_error - """ - PCEModel = self.PCEModel - - - # get the samples - X_Val = self.Samples - - - Y_PC_Val = self.PCEOutputs - Y_Val = self.ModelOutputs - - # Open a pdf for the plots - if SaveFig: - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - - # create a PdfPages object - pdf1 = PdfPages('./'+newpath+'/Model_vs_PCEModel.pdf') - - fig = plt.figure() - # Fit the data(train the model) - for key in Y_PC_Val.keys(): - - Y_PC_Val_ = Y_PC_Val[key] - Y_Val_ = Y_Val[key] - - regression_model = LinearRegression() - regression_model.fit(Y_PC_Val_, Y_Val_) - - # Predict - x_new = np.linspace(np.min(Y_PC_Val_), np.max(Y_Val_), 100) - y_predicted = regression_model.predict(x_new[:, np.newaxis]) - - plt.scatter(Y_PC_Val_, Y_Val_, color='gold', linewidth=2) - plt.plot(x_new, y_predicted, color = 'k') - - # Calculate the adjusted R_squared and RMSE - # the total number of explanatory variables in the model (not including the constant term) - length_list = [len(value) for Key, value in PCEModel.basis_dict[key].items()] - NofPredictors = min(length_list) - TotalSampleSize = X_Val.shape[0] #sample size - - R2 = r2_score(Y_PC_Val_, Y_Val_) - AdjR2 = 1 - (1 - R2) * (TotalSampleSize - 1)/(TotalSampleSize - NofPredictors - 1) - RMSE = np.sqrt(mean_squared_error(Y_PC_Val_, Y_Val_)) - - plt.annotate('RMSE = '+ str(round(RMSE, 3)) + '\n' + r'Adjusted $R^2$ = '+ str(round(AdjR2, 3)), xy=(0.05, 0.85), xycoords='axes fraction') - - plt.ylabel("Original Model") - plt.xlabel("PCE Model") - - plt.grid() - plt.show() - - if SaveFig: - # save the current figure - pdf1.savefig(fig, bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - # Close the pdfs - pdf1.close() + return self.rmse, self.valid_error # ------------------------------------------------------------------------- - def regQualityCheck(self, nValidSamples=1000, samples=None, SaveFig=True): + def plot_seq_design_diagnostics(self, ref_BME_KLD=None, save_fig=True): """ - 1) https://towardsdatascience.com/how-do-you-check-the-quality-of-your-regression-model-in-python-fa61759ff685 + Plots the Bayesian Model Evidence (BME) and Kullback-Leibler divergence + (KLD) for the sequential design. - """ - metaModel = self.PCEModel - - if samples is None: - self.NrofSamples = nValidSamples - samples = self.get_Sample() - else: - self.NrofSamples = samples.shape[0] - - - # Evaluate the original and the surrogate model - Y_Val = self.eval_Model(samples, keyString='valid') - Y_PC_Val, _ = metaModel.eval_metamodel(samples=samples,name=self.Name) + Parameters + ---------- + ref_BME_KLD : array, optional + Reference BME and KLD . The default is None. + save_fig : bool, optional + Whether to save the figures. The default is True. - # Open a pdf for the plots - if SaveFig: - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - - # Fit the data(train the model) - for key in Y_PC_Val.keys(): - - Y_PC_Val_ = Y_PC_Val[key] - Y_Val_ = Y_Val[key] - - # ------ Residuals vs. predicting variables ------ - # Check the assumptions of linearity and independence - fig1 = plt.figure() - plt.title(key+": Residuals vs. predicting variables") - residuals = Y_Val_ - Y_PC_Val_ - plt.scatter(x=Y_Val_,y=residuals,color='blue',edgecolor='k') - plt.grid(True) - xmin=min(Y_Val_) - xmax = max(Y_Val_) - plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3) - plt.xlabel(key) - plt.ylabel('Residuals') - plt.show() - - if SaveFig: - # save the current figure - fig1.savefig('./'+newpath+'/Residuals_vs_PredVariables.pdf', bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - # ------ Fitted vs. residuals ------ - # Check the assumptions of linearity and independence - fig2 = plt.figure() - plt.title(key+": Residuals vs. predicting variables") - residuals = Y_Val_ - Y_PC_Val_ - plt.scatter(x=Y_PC_Val_,y=residuals,color='blue',edgecolor='k') - plt.grid(True) - xmin=min(Y_Val_) - xmax = max(Y_Val_) - plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3) - plt.xlabel(key) - plt.ylabel('Residuals') - plt.show() + Returns + ------- + None. - if SaveFig: - # save the current figure - fig2.savefig('./'+newpath+'/Fitted_vs_Residuals.pdf', bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - # ------ Histogram of normalized residuals ------ - fig3 = plt.figure() - resid_pearson = residuals / (max(residuals)-min(residuals)) - plt.hist(resid_pearson,bins=20,edgecolor='k') - plt.ylabel('Count') - plt.xlabel('Normalized residuals') - plt.title(key+": Histogram of normalized residuals") - - # Normality (Shapiro-Wilk) test of the residuals - ax = plt.gca() - _,p=stats.shapiro(residuals) - if p<0.01: - annText= "The residuals seem to come from Gaussian process." - else: - annText= "The normality assumption may not hold." - from matplotlib.offsetbox import AnchoredText - at = AnchoredText(annText, - prop=dict(size=30), frameon=True, - loc='upper left', - ) - at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2") - ax.add_artist(at) - - plt.show() - - if SaveFig: - # save the current figure - fig3.savefig('./'+newpath+'/Hist_NormResiduals.pdf', bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - # ------ Q-Q plot of the normalized residuals ------ - from statsmodels.graphics.gofplots import qqplot - plt.figure() - fig4=qqplot(resid_pearson,line='45',fit='True') - plt.xticks() - plt.yticks() - plt.xlabel("Theoretical quantiles") - plt.ylabel("Sample quantiles") - plt.title(key+": Q-Q plot of normalized residuals") - plt.grid(True) - plt.show() - - if SaveFig: - # save the current figure - fig4.savefig('./'+newpath+'/QQPlot_NormResiduals.pdf', bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - # ------------------------------------------------------------------------- - def plotValidationMulti(self, x_values=[], x_axis="x [m]", SaveFig=True): - - Model = self.PCEModel.ModelObj - - if SaveFig: - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - - # create a PdfPages object - pdf = PdfPages('./'+newpath+'/Model_vs_PCEModel.pdf') + """ + PCEModel = self.MetaModel + n_init_samples = PCEModel.ExpDesign.n_init_samples + n_total_samples = PCEModel.ExpDesign.X.shape[0] - # List of markers and colors - color = cycle((['b', 'g', 'r', 'y', 'k'])) - marker = cycle(('x', 'd', '+', 'o', '*')) - - - Y_PC_Val = self.PCEOutputs - Y_PC_Val_std = self.PCEOutputs_std - - Y_Val = self.ModelOutputs - - OutNames = list(Y_Val.keys()) - if len(OutNames) == 1: OutNames.insert(0, x_axis) - try: - x_values = Y_Val[OutNames[0]] - except: - x_values = x_values - - fig = plt.figure(figsize=(24, 16)) - - # Plot the model vs PCE model - for keyIdx, key in enumerate(OutNames[1:]): + if save_fig: + newpath = f'Outputs_PostProcessing_{self.name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) - Y_PC_Val_ = Y_PC_Val[key] - Y_PC_Val_std_ = Y_PC_Val_std[key] - Y_Val_ = Y_Val[key] - try: - x = x_values[key] - except: - x = x_values - - for idx in range(Y_Val_.shape[0]): - Color = next(color) - Marker = next(marker) - - plt.plot(x, Y_Val_[idx,:], color=Color, marker=Marker, label='$Y_{%s}^{M}$'%(idx+1)) - - plt.plot(x, Y_PC_Val_[idx,:], color=Color, marker=Marker, linestyle='--', label='$Y_{%s}^{PCE}$'%(idx+1)) - plt.fill_between(x, Y_PC_Val_[idx,:]-1.96*Y_PC_Val_std_[idx,:], - Y_PC_Val_[idx,:]+1.96*Y_PC_Val_std_[idx,:], color=Color,alpha=0.15) - - # Calculate the RMSE - RMSE = mean_squared_error(Y_PC_Val_, Y_Val_, squared=False) - R2 = r2_score(Y_PC_Val_[idx,:].reshape(-1,1), Y_Val_[idx,:].reshape(-1,1)) - - plt.annotate('RMSE = '+ str(round(RMSE, 3)) + '\n' + r'$R^2$ = '+ str(round(R2, 3)), - xy=(0.2, 0.75), xycoords='axes fraction') - - - plt.ylabel(key) - plt.xlabel(x_axis) - plt.legend(loc='best') - plt.grid() - - #plt.show() - - if SaveFig: - # save the current figure - pdf.savefig(fig, bbox_inches='tight') - - # Destroy the current plot - plt.clf() - - pdf.close() - - # Cleanup - #Zip the subdirectories - try: - dir_name = Model.name + 'valid' - key = dir_name + '_' - Model.zip_subdirs(dir_name, key) - except: - pass - - # ------------------------------------------------------------------------- - def seqDesignDiagnosticPlots(self, refBME_KLD=None, SaveFig=True): - """ - Plot the Kullback-Leibler divergence in the sequential design. - - """ - PCEModel = self.PCEModel - NrofInitSamples = PCEModel.ExpDesign.n_init_samples - totalNSamples = PCEModel.ExpDesign.X.shape[0] - - if SaveFig: - newpath = (r'Outputs_PostProcessing_{0}/'.format(self.Name)) - if not os.path.exists(newpath): os.makedirs(newpath) - # create a PdfPages object - pdf = PdfPages('./'+newpath+'/seqPCEModelDiagnostics.pdf') - - plotList = ['Modified LOO error', 'Validation error', 'KLD', 'BME', + pdf = PdfPages(f'./{newpath}/seqPCEModelDiagnostics.pdf') + + plotList = ['Modified LOO error', 'Validation error', 'KLD', 'BME', 'RMSEMean', 'RMSEStd', 'Hellinger distance'] - seqList = [PCEModel.SeqModifiedLOO,PCEModel.seqValidError, PCEModel.SeqKLD, - PCEModel.SeqBME, PCEModel.seqRMSEMean, PCEModel.seqRMSEStd, - PCEModel.SeqDistHellinger] - + seqList = [PCEModel.SeqModifiedLOO, PCEModel.seqValidError, + PCEModel.SeqKLD, PCEModel.SeqBME, PCEModel.seqRMSEMean, + PCEModel.seqRMSEStd, PCEModel.SeqDistHellinger] + markers = ('x', 'o', 'd', '*', '+') colors = ('k', 'darkgreen', 'b', 'navy', 'darkred') - - # Plot the evolution of the diagnostic criteria of the Sequential Experimental Design. - for plotidx, plot in enumerate(plotList): + + # Plot the evolution of the diagnostic criteria of the + # Sequential Experimental Design. + for plotidx, plot in enumerate(plotList): fig, ax = plt.subplots() - SeqDict = seqList[plotidx] - nameUtil = list(SeqDict.keys()) - - if len(nameUtil) == 0: + seq_dict = seqList[plotidx] + name_util = list(seq_dict.keys()) + + if len(name_util) == 0: continue - + # Box plot when Replications have been detected. - if any(int(name.split("rep_",1)[1])>1 for name in nameUtil): + if any(int(name.split("rep_", 1)[1]) > 1 for name in name_util): # Extract the values from dict - sortedSeqOpt = {} + sorted_seq_opt = {} # Number of replications - nReps = PCEModel.ExpDesign.nReprications - + n_reps = PCEModel.ExpDesign.nReprications + # Get the list of utility function names # Handle if only one UtilityFunction is provided - if not isinstance(PCEModel.ExpDesign.UtilityFunction, list): - UtilFuncs = [PCEModel.ExpDesign.UtilityFunction] + if not isinstance(PCEModel.ExpDesign.UtilityFunction, list): + util_funcs = [PCEModel.ExpDesign.UtilityFunction] else: - UtilFuncs = PCEModel.ExpDesign.UtilityFunction - - for keyOpt in UtilFuncs: + util_funcs = PCEModel.ExpDesign.UtilityFunction + + for util in util_funcs: sortedSeq = {} # min number of runs available from reps - nRuns = min([SeqDict[keyOpt +'_rep_'+str(i+1)].shape[0] for i in range(nReps)]) - - for runIdx in range(nRuns): + n_runs = min([seq_dict[f'{util}_rep_{i+1}'].shape[0] + for i in range(n_reps)]) + + for runIdx in range(n_runs): values = [] - for key in SeqDict.keys(): - if keyOpt in key: - values.append(SeqDict[key][runIdx].mean()) - sortedSeq['SeqItr_'+str(runIdx)] = np.array(values) - sortedSeqOpt[keyOpt] = sortedSeq - + for key in seq_dict.keys(): + if util in key: + values.append(seq_dict[key][runIdx].mean()) + sortedSeq['SeqItr_'+str(runIdx)] = np.array(values) + sorted_seq_opt[util] = sortedSeq + # BoxPlot def draw_plot(data, labels, edge_color, fill_color, idx): pos = labels - (idx-1) bp = plt.boxplot(data, positions=pos, labels=labels, - patch_artist=True , sym='', widths=0.75) - - for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: + patch_artist=True, sym='', widths=0.75) + elements = ['boxes', 'whiskers', 'fliers', 'means', + 'medians', 'caps'] + for element in elements: plt.setp(bp[element], color=edge_color[idx]) - + for patch in bp['boxes']: patch.set(facecolor=fill_color[idx]) - - - step1 = PCEModel.ExpDesign.n_new_samples if PCEModel.ExpDesign.n_new_samples!=1 else 5 - step2 = 1 if PCEModel.ExpDesign.n_new_samples!=1 else 5 + + if PCEModel.ExpDesign.n_new_samples != 1: + step1 = PCEModel.ExpDesign.n_new_samples + step2 = 1 + else: + step1 = 5 + step2 = 5 edge_color = ['red', 'blue', 'green'] fill_color = ['tan', 'cyan', 'lightgreen'] - - plotLabel = plot + plot_label = plot # Plot for different Utility Functions - for idx, util in enumerate(UtilFuncs): - allErrors = np.empty((nReps, 0)) - - for key in list(sortedSeqOpt[util].keys()): - allErrors = np.hstack((allErrors, sortedSeqOpt.get(util, {}).get(key)[:,None])) - + for idx, util in enumerate(util_funcs): + all_errors = np.empty((n_reps, 0)) + + for key in list(sorted_seq_opt[util].keys()): + errors = sorted_seq_opt.get(util, {}).get(key)[:, None] + all_errors = np.hstack((all_errors, errors)) + # Special cases for BME and KLD if plot == 'KLD' or plot == 'BME': # BME convergence if refBME is provided - if refBME_KLD is not None: - if plot == 'BME': refValue, plotLabel = refBME_KLD[0], r'$BME/BME^{Ref.}$' - if plot == 'KLD': refValue, plotLabel = refBME_KLD[1], r'$D_{KL}[p(\theta|y_*),p(\theta)] / D_{KL}^{Ref.}[p(\theta|y_*), p(\theta)]$' - + if ref_BME_KLD is not None: + if plot == 'BME': + refValue = ref_BME_KLD[0] + plot_label = r'$BME/BME^{Ref.}$' + if plot == 'KLD': + refValue = ref_BME_KLD[1] + plot_label = '$D_{KL}[p(\theta|y_*),p(\theta)]'\ + ' / D_{KL}^{Ref.}[p(\theta|y_*), '\ + 'p(\theta)]$' + # Difference between BME/KLD and the ref. values - allErrors = np.divide(allErrors,np.full((allErrors.shape), refValue)) + all_errors = np.divide(all_errors, + np.full((all_errors.shape), + refValue)) # Plot baseline for zero, i.e. no difference - plt.axhline(y=1.0, xmin=0, xmax=1, c='green', ls='--', lw=2) - + plt.axhline(y=1.0, xmin=0, xmax=1, c='green', + ls='--', lw=2) + # Plot each UtilFuncs - labels = np.arange(NrofInitSamples, totalNSamples+1, step1) - draw_plot(allErrors[:,::step2], labels, edge_color, fill_color, idx) - + labels = np.arange(n_init_samples, n_total_samples+1, step1) + draw_plot(all_errors[:, ::step2], labels, edge_color, + fill_color, idx) + plt.xticks(labels, labels) # Set the major and minor locators ax.xaxis.set_major_locator(ticker.AutoLocator()) ax.xaxis.set_minor_locator(ticker.AutoMinorLocator()) ax.xaxis.grid(True, which='major', linestyle='-') ax.xaxis.grid(True, which='minor', linestyle='--') - - from matplotlib.patches import Patch - legend_elements = [Patch(facecolor=fill_color[idx], edgecolor=edge_color[idx], - label=util) for idx,util in enumerate(UtilFuncs)] + + # Legend + legend_elements = [] + for idx, util in enumerate(util_funcs): + legend_elements.append(Patch(facecolor=fill_color[idx], + edgecolor=edge_color[idx], + label=util)) plt.legend(handles=legend_elements[::-1], loc='best') - - if plot != 'BME' and plot != 'KLD': plt.yscale('log') + + if plot != 'BME' and plot != 'KLD': + plt.yscale('log') plt.autoscale(True) - plt.xlabel('\# of training samples') - plt.ylabel(plotLabel) + plt.xlabel('\\# of training samples') + plt.ylabel(plot_label) plt.title(plot) - - if SaveFig: + + if save_fig: # save the current figure pdf.savefig(fig, bbox_inches='tight') - # Destroy the current plot plt.clf() - # Save arrays into files - #np.savetxt('./'+newpath+'/Seq'+plot+'.txt', sortedSeqOpt) - f = open('./'+newpath+'/Seq'+plot+'.txt',"w") - f.write( str(sortedSeqOpt) ) + f = open(f'./{newpath}/Seq{plot}.txt', 'w') + f.write(str(sorted_seq_opt)) f.close() - - - else: - # fig,ax = plt.subplots() - - for idx, name in enumerate(nameUtil): - SeqValues = SeqDict[name] - step = PCEModel.ExpDesign.n_new_samples if PCEModel.ExpDesign.n_new_samples!=1 else 1 - x_idx = np.arange(NrofInitSamples, totalNSamples+1, step) - x_idx = np.hstack((x_idx, totalNSamples)) if totalNSamples not in x_idx else x_idx + else: + for idx, name in enumerate(name_util): + seq_values = seq_dict[name] + if PCEModel.ExpDesign.n_new_samples != 1: + step = PCEModel.ExpDesign.n_new_samples + else: + step = 1 + x_idx = np.arange(n_init_samples, n_total_samples+1, step) + if n_total_samples not in x_idx: + x_idx = np.hstack((x_idx, n_total_samples)) if plot == 'KLD' or plot == 'BME': # BME convergence if refBME is provided - if refBME_KLD is not None: - if plot == 'BME': refValue, plotLabel = refBME_KLD[0], r'$BME/BME^{Ref.}$' - if plot == 'KLD': refValue, plotLabel = refBME_KLD[1], r'$D_{KL}[p(\theta|y_*),p(\theta)] / D_{KL}^{Ref.}[p(\theta|y_*), p(\theta)]$' - + if ref_BME_KLD is not None: + if plot == 'BME': + refValue = ref_BME_KLD[0] + plot_label = r'$BME/BME^{Ref.}$' + if plot == 'KLD': + refValue = ref_BME_KLD[1] + plot_label = '$D_{KL}[p(\theta|y_*),p(\theta)]'\ + ' / D_{KL}^{Ref.}[p(\theta|y_*), '\ + 'p(\theta)]$' + # Difference between BME/KLD and the ref. values - values = np.divide(SeqValues,np.full((SeqValues.shape), refValue)) - + values = np.divide(seq_values, + np.full((seq_values.shape), + refValue)) + # Plot baseline for zero, i.e. no difference - plt.axhline(y=1.0, xmin=0, xmax=1, c='green', ls='--', lw=2) - + plt.axhline(y=1.0, xmin=0, xmax=1, c='green', + ls='--', lw=2) + # Set the limits plt.ylim([1e-1, 1e1]) - + # Create the plots - plt.semilogy(x_idx, values, marker=markers[idx], color=colors[idx], - ls='--', lw=2, label=name.split("_rep",1)[0]) + plt.semilogy(x_idx, values, marker=markers[idx], + color=colors[idx], ls='--', lw=2, + label=name.split("_rep", 1)[0]) else: - plotLabel = plot - + plot_label = plot + # Create the plots - plt.plot(x_idx, SeqValues, marker=markers[idx], color=colors[idx], - ls='--', lw=2, label=name.split("_rep",1)[0]) - - + plt.plot(x_idx, seq_values, marker=markers[idx], + color=colors[idx], ls='--', lw=2, + label=name.split("_rep", 1)[0]) + else: - plotLabel = plot - SeqValues = np.nan_to_num(SeqValues) - + plot_label = plot + seq_values = np.nan_to_num(seq_values) + # Plot the error evolution for each output - for i in range(SeqValues.shape[1]): - plt.semilogy(x_idx, SeqValues[:,i], marker=markers[idx], - ls='--', lw=2, color=colors[idx], alpha=0.15) - - plt.semilogy(x_idx, SeqValues, marker=markers[idx], - ls='--', lw=2, color=colors[idx], label=name.split("_rep",1)[0]) - - + for i in range(seq_values.shape[1]): + plt.semilogy(x_idx, seq_values[:, i], ls='--', + lw=2, marker=markers[idx], + color=colors[idx], alpha=0.15) + + plt.semilogy(x_idx, seq_values, marker=markers[idx], + ls='--', lw=2, color=colors[idx], + label=name.split("_rep", 1)[0]) + # Set the major and minor locators ax.xaxis.set_major_locator(ticker.AutoLocator()) ax.xaxis.set_minor_locator(ticker.AutoMinorLocator()) ax.xaxis.grid(True, which='major', linestyle='-') ax.xaxis.grid(True, which='minor', linestyle='--') - - ax.tick_params(axis='both', which='major', direction='in', width=3, length=10) - ax.tick_params(axis='both', which='minor', direction='in', width=2, length=8) - + + ax.tick_params(axis='both', which='major', direction='in', + width=3, length=10) + ax.tick_params(axis='both', which='minor', direction='in', + width=2, length=8) plt.xlabel('Number of runs') - plt.ylabel(plotLabel) + plt.ylabel(plot_label) plt.title(plot) plt.legend(frameon=True) - - - if SaveFig: + + if save_fig: # save the current figure pdf.savefig(fig, bbox_inches='tight') - # Destroy the current plot plt.clf() # ---------------- Saving arrays into files --------------- - np.save('./'+newpath+'/Seq'+plot+'.npy', SeqValues) - + np.save(f'./{newpath}/Seq{plot}.npy', seq_values) + # Close the pdf pdf.close() return - + # ------------------------------------------------------------------------- - def sobolIndicesPCE(self, xlabel='Time [s]', plotType=None, SaveFig=True): + def sobol_indices(self, xlabel='Time [s]', plot_type=None, save_fig=True): """ Provides Sobol indices as a sensitivity measure to infer the importance of the input parameters. See Eq. 27 in [1] for more details. @@ -893,13 +549,32 @@ class PostProcessing: sensitivity analysis and model calibration: Application to urban drainage simulation. Reliability Engineering & System Safety, 195, p.106737. + + Parameters + ---------- + xlabel : TYPE, optional + DESCRIPTION. The default is 'Time [s]'. + plot_type : TYPE, optional + DESCRIPTION. The default is None. + save_fig : TYPE, optional + DESCRIPTION. The default is True. + + Returns + ------- + sobol_cell: dict + Sobol indices. + total_sobol: dict + Total Sobol indices. + """ # Extract the necessary variables - PCEModel = self.PCEModel + PCEModel = self.MetaModel basis_dict = PCEModel.basis_dict coeffs_dict = PCEModel.coeffs_dict - NofPa = PCEModel.n_params + n_params = PCEModel.n_params max_order = np.max(PCEModel.pce_deg) + self.sobol_cell = {} + self.total_sobol = {} for Output in PCEModel.ModelObj.Output.names: @@ -910,11 +585,11 @@ class PostProcessing: sobol_cell_array = dict.fromkeys(range(1, max_order+1), []) for i_order in range(1, max_order+1): - n_comb = math.comb(NofPa, i_order) + n_comb = math.comb(n_params, i_order) sobol_cell_array[i_order] = np.zeros((n_comb, n_meas_points)) - total_sobol_array = np.zeros((NofPa, n_meas_points)) + total_sobol_array = np.zeros((n_params, n_meas_points)) # Initialize the cell to store the names of the variables TotalVariance = np.zeros((n_meas_points)) @@ -948,7 +623,7 @@ class PostProcessing: for i_order in range(1, max_order+1): idx = np.where(np.sum(nz_basis > 0, axis=1) == i_order) subbasis = nz_basis[idx] - Z = np.array(list(combinations(range(NofPa), i_order))) + Z = np.array(list(combinations(range(n_params), i_order))) for q in range(Z.shape[0]): Zq = Z[q] @@ -963,7 +638,7 @@ class PostProcessing: sobol_cell_array[i_order][q, pIdx] = sobol # Compute the TOTAL Sobol indices. - for ParIdx in range(NofPa): + for ParIdx in range(n_params): idx = nz_basis[:, ParIdx] > 0 sum_ind = nzidx[idx] @@ -976,7 +651,7 @@ class PostProcessing: # ----- if PCA selected: Compute covariance ----- if PCEModel.dim_red_method.lower() == 'pca': - cov_Z_p_q = np.zeros((NofPa)) + cov_Z_p_q = np.zeros((n_params)) # Extract the basis indices (alpha) and coefficients for # next component if pIdx < n_meas_points-1: @@ -992,7 +667,7 @@ class PostProcessing: mask = (Basis[:, None] == nextBasis).all(-1).any(-1) similar_basis = Basis[mask] # Compute the TOTAL Sobol indices. - for ParIdx in range(NofPa): + for ParIdx in range(n_params): idx = similar_basis[:, ParIdx] > 0 try: sum_is = nzidx[idx] @@ -1011,9 +686,9 @@ class PostProcessing: # Extract the sobol index of the components for i_order in range(1, max_order+1): - n_comb = math.comb(NofPa, i_order) + n_comb = math.comb(n_params, i_order) sobol_array[i_order] = np.zeros((n_comb, n_c_points)) - Z = np.array(list(combinations(range(NofPa), i_order))) + Z = np.array(list(combinations(range(n_params), i_order))) for q in range(Z.shape[0]): S_Z_i = sobol_cell_array[i_order][q] @@ -1034,8 +709,8 @@ class PostProcessing: sobol_array[i_order][q, tIdx] = term1 #+ term2 # Compute the TOTAL Sobol indices. - total_sobol = np.zeros((NofPa, n_c_points)) - for ParIdx in range(NofPa): + total_sobol = np.zeros((n_params, n_c_points)) + for ParIdx in range(n_params): S_Z_i = total_sobol_array[ParIdx] for tIdx in range(n_c_points): @@ -1073,11 +748,11 @@ class PostProcessing: cases = [''] for case in cases: - newpath = (f'Outputs_PostProcessing_{self.Name}/') + newpath = (f'Outputs_PostProcessing_{self.name}/') if not os.path.exists(newpath): os.makedirs(newpath) - if SaveFig: + if save_fig: # create a PdfPages object name = case+'_' if 'Valid' in cases else '' pdf = PdfPages('./'+newpath+name+'Sobol_indices.pdf') @@ -1095,7 +770,7 @@ class PostProcessing: else: x = x_values_orig - if plotType == 'bar': + if plot_type == 'bar': ax = fig.add_axes([0, 0, 1, 1]) dict1 = {xlabel: x} dict2 = {param: sobolIndices for param, sobolIndices @@ -1115,7 +790,7 @@ class PostProcessing: plt.xlabel(xlabel) plt.title(f'Sensitivity analysis of {Output}') - if plotType != 'bar': + if plot_type != 'bar': plt.legend(loc='best', frameon=True) # Save indices @@ -1124,7 +799,7 @@ class PostProcessing: total_sobol.T, delimiter=',', header=','.join(par_names), comments='') - if SaveFig: + if save_fig: # save the current figure pdf.savefig(fig, bbox_inches='tight') @@ -1134,3 +809,498 @@ class PostProcessing: pdf.close() return self.sobol_cell, self.total_sobol + + # ------------------------------------------------------------------------- + def check_reg_quality(self, n_samples=1000, samples=None, save_fig=True): + """ + Checks the quality of the metamodel for single output models. + https://towardsdatascience.com/how-do-you-check-the-quality-of-your-regression-model-in-python-fa61759ff685 + + + Parameters + ---------- + n_samples : int, optional + Number of parameter sets to use for the check. The default is 1000. + samples : array of shape (n_samples, n_params), optional + Parameter sets to use for the check. The default is None. + save_fig : Bool, optional + Whether to save the figures. The default is True. + + Returns + ------- + None. + + """ + MetaModel = self.MetaModel + + if samples is None: + self.n_samples = n_samples + samples = self._get_sample() + else: + self.n_samples = samples.shape[0] + + # Evaluate the original and the surrogate model + y_val = self._eval_model(samples, key_str='valid') + y_pce_val, _ = MetaModel.eval_metamodel(samples=samples, + name=self.name) + + # Open a pdf for the plots + if save_fig: + newpath = (r'Outputs_PostProcessing_{0}/'.format(self.name)) + if not os.path.exists(newpath): + os.makedirs(newpath) + + # Fit the data(train the model) + for key in y_pce_val.keys(): + + y_pce_val_ = y_pce_val[key] + y_val_ = y_val[key] + + # ------ Residuals vs. predicting variables ------ + # Check the assumptions of linearity and independence + fig1 = plt.figure() + plt.title(key+": Residuals vs. predicting variables") + residuals = y_val_ - y_pce_val_ + plt.scatter(x=y_val_, y=residuals, color='blue', edgecolor='k') + plt.grid(True) + xmin, xmax = min(y_val_), min(y_val_) + plt.hlines(y=0, xmin=xmin*0.9, xmax=xmax*1.1, color='red', lw=3, + linestyle='--') + plt.xlabel(key) + plt.ylabel('Residuals') + plt.show() + + if save_fig: + # save the current figure + fig1.savefig(f'./{newpath}/Residuals_vs_PredVariables.pdf', + bbox_inches='tight') + # Destroy the current plot + plt.clf() + + # ------ Fitted vs. residuals ------ + # Check the assumptions of linearity and independence + fig2 = plt.figure() + plt.title(key+": Residuals vs. predicting variables") + plt.scatter(x=y_pce_val_, y=residuals, color='blue', edgecolor='k') + plt.grid(True) + xmin, xmax = min(y_val_), min(y_val_) + plt.hlines(y=0, xmin=xmin*0.9, xmax=xmax*1.1, color='red', lw=3, + linestyle='--') + plt.xlabel(key) + plt.ylabel('Residuals') + plt.show() + + if save_fig: + # save the current figure + fig2.savefig(f'./{newpath}/Fitted_vs_Residuals.pdf', + bbox_inches='tight') + # Destroy the current plot + plt.clf() + + # ------ Histogram of normalized residuals ------ + fig3 = plt.figure() + resid_pearson = residuals / (max(residuals)-min(residuals)) + plt.hist(resid_pearson, bins=20, edgecolor='k') + plt.ylabel('Count') + plt.xlabel('Normalized residuals') + plt.title(f"{key}: Histogram of normalized residuals") + + # Normality (Shapiro-Wilk) test of the residuals + ax = plt.gca() + _, p = stats.shapiro(residuals) + if p < 0.01: + annText = "The residuals seem to come from Gaussian process." + else: + annText = "The normality assumption may not hold." + at = AnchoredText(annText, prop=dict(size=30), frameon=True, + loc='upper left') + at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2") + ax.add_artist(at) + + plt.show() + + if save_fig: + # save the current figure + fig3.savefig(f'./{newpath}/Hist_NormResiduals.pdf', + bbox_inches='tight') + # Destroy the current plot + plt.clf() + + # ------ Q-Q plot of the normalized residuals ------ + plt.figure() + fig4 = qqplot(resid_pearson, line='45', fit='True') + plt.xticks() + plt.yticks() + plt.xlabel("Theoretical quantiles") + plt.ylabel("Sample quantiles") + plt.title(key+": Q-Q plot of normalized residuals") + plt.grid(True) + plt.show() + + if save_fig: + # save the current figure + fig4.savefig(f'./{newpath}/QQPlot_NormResiduals.pdf', + bbox_inches='tight') + # Destroy the current plot + plt.clf() + + # ------------------------------------------------------------------------- + def eval_pce_model_3d(self, save_fig=True): + + self.n_samples = 1000 + + PCEModel = self.MetaModel + Model = self.MetaModel.ModelObj + n_samples = self.n_samples + + # Create 3D-Grid + # TODO: Make it general + x = np.linspace(-5, 10, n_samples) + y = np.linspace(0, 15, n_samples) + + X, Y = np.meshgrid(x, y) + PCE_Z = np.zeros((self.n_samples, self.n_samples)) + Model_Z = np.zeros((self.n_samples, self.n_samples)) + + for idxMesh in range(self.n_samples): + sample_mesh = np.vstack((X[:, idxMesh], Y[:, idxMesh])).T + + univ_p_val = PCEModel.univ_basis_vals(sample_mesh) + + for Outkey, ValuesDict in PCEModel.coeffs_dict.items(): + + pce_out_mean = np.zeros((len(sample_mesh), len(ValuesDict))) + pce_out_std = np.zeros((len(sample_mesh), len(ValuesDict))) + model_outs = np.zeros((len(sample_mesh), len(ValuesDict))) + + for Inkey, InIdxValues in ValuesDict.items(): + idx = int(Inkey.split('_')[1]) - 1 + basis_deg_ind = PCEModel.basis_dict[Outkey][Inkey] + clf_poly = PCEModel.clf_poly[Outkey][Inkey] + + PSI_Val = PCEModel.create_psi(basis_deg_ind, univ_p_val) + + # Perdiction with error bar + y_mean, y_std = clf_poly.predict(PSI_Val, return_std=True) + + pce_out_mean[:, idx] = y_mean + pce_out_std[:, idx] = y_std + + # Model evaluation + model_out_dict, _ = Model.run_model_parallel(sample_mesh, + key_str='Valid3D') + model_outs[:, idx] = model_out_dict[Outkey].T + + PCE_Z[:, idxMesh] = y_mean + Model_Z[:, idxMesh] = model_outs[:, 0] + + # ---------------- 3D plot for PCEModel ----------------------- + fig_PCE = plt.figure() + ax = plt.axes(projection='3d') + ax.plot_surface(X, Y, PCE_Z, rstride=1, cstride=1, + cmap='viridis', edgecolor='none') + ax.set_title('PCEModel') + ax.set_xlabel('$x_1$') + ax.set_ylabel('$x_2$') + ax.set_zlabel('$f(x_1,x_2)$') + + plt.grid() + plt.show() + + if save_fig: + # Saving the figure + newpath = f'Outputs_PostProcessing_{self.name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) + + # save the figure to file + fig_PCE.savefig(f'./{newpath}/3DPlot_PCEModel.pdf', format="pdf", + bbox_inches='tight') + plt.close(fig_PCE) + + # ---------------- 3D plot for Model ----------------------- + fig_Model = plt.figure() + ax = plt.axes(projection='3d') + ax.plot_surface(X, Y, PCE_Z, rstride=1, cstride=1, + cmap='viridis', edgecolor='none') + ax.set_title('Model') + ax.set_xlabel('$x_1$') + ax.set_ylabel('$x_2$') + ax.set_zlabel('$f(x_1,x_2)$') + + plt.grid() + plt.show() + + if save_fig: + # Save the figure + fig_Model.savefig(f'./{newpath}/3DPlot_Model.pdf', format="pdf", + bbox_inches='tight') + plt.close(fig_Model) + + return + + # ------------------------------------------------------------------------- + def _compute_pce_moments(self): + """ + Computes the first two moments using the PCE-based meta-model. + + Returns + ------- + None. + + """ + + MetaModel = self.MetaModel + self.pce_means = {} + self.pce_stds = {} + + for Outkey, ValuesDict in MetaModel.coeffs_dict.items(): + + pce_mean = np.zeros((len(ValuesDict))) + pce_var = np.zeros((len(ValuesDict))) + + for Inkey, InIdxValues in ValuesDict.items(): + idx = int(Inkey.split('_')[1]) - 1 + coeffs = MetaModel.coeffs_dict[Outkey][Inkey] + + # Mean = c_0 + if coeffs[0] != 0: + pce_mean[idx] = coeffs[0] + else: + pce_mean[idx] = MetaModel.clf_poly[Outkey][Inkey].intercept_ + + # Var = sum(coeffs[1:]**2) + pce_var[idx] = np.sum(np.square(coeffs[1:])) + + # Back transformation if PCA is selected. + if MetaModel.dim_red_method.lower() == 'pca': + PCA = MetaModel.pca[Outkey] + self.pce_means[Outkey] = PCA.mean_ + self.pce_means[Outkey] += np.dot(pce_mean, PCA.components_) + self.pce_stds[Outkey] = np.sqrt(np.dot(pce_var, + PCA.components_**2)) + else: + self.pce_means[Outkey] = pce_mean + self.pce_stds[Outkey] = np.sqrt(pce_var) + + # Print a report table + print("\n>>>>> Moments of {} <<<<<".format(Outkey)) + print("\nIndex | Mean | Std. deviation") + print('-'*35) + print('\n'.join(f'{i+1} | {k:.3e} | {j:.3e}' for i, (k, j) + in enumerate(zip(self.pce_means[Outkey], + self.pce_stds[Outkey])))) + print('-'*40) + + # ------------------------------------------------------------------------- + def _get_sample(self, n_samples=None): + """ + Generates random samples taken from the input parameter space. + + Returns + ------- + samples : array of shape (n_samples, n_params) + Generated samples. + + """ + if n_samples is None: + n_samples = self.n_samples + PCEModel = self.MetaModel + self.samples = PCEModel.ExpDesign.generate_samples(n_samples, 'random') + return self.samples + + # ------------------------------------------------------------------------- + def _eval_model(self, samples=None, key_str='Valid'): + """ + Evaluates Forward Model for the given number of self.samples or given + samples. + + Parameters + ---------- + samples : array of shape (n_samples, n_params), optional + Samples to evaluate the model at. The default is None. + key_str : string, optional + Key string pass to the model. The default is 'Valid'. + + Returns + ------- + model_outs : dict + Dictionary of results. + + """ + Model = self.MetaModel.ModelObj + + if samples is None: + samples = self._get_sample() + self.samples = samples + else: + self.n_samples = len(samples) + + model_outs, _ = Model.run_model_parallel(samples, key_str=key_str) + + return model_outs + + # ------------------------------------------------------------------------- + def _plot_validation(self, save_fig=True): + """ + Plots outputs for visual comparison of metamodel outputs with that of + the (full) original model. + + Parameters + ---------- + save_fig : bool, optional + Save the plots. The default is True. + + Returns + ------- + None. + + """ + PCEModel = self.MetaModel + + # get the samples + x_val = self.samples + y_pce_val = self.pce_out_mean + y_val = self.model_out_dict + + # Open a pdf for the plots + if save_fig: + newpath = f'Outputs_PostProcessing_{self.name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) + + # create a PdfPages object + pdf1 = PdfPages(f'./{newpath}/Model_vs_PCEModel.pdf') + + fig = plt.figure() + # Fit the data(train the model) + for key in y_pce_val.keys(): + + y_pce_val_ = y_pce_val[key] + y_val_ = y_val[key] + + regression_model = LinearRegression() + regression_model.fit(y_pce_val_, y_val_) + + # Predict + x_new = np.linspace(np.min(y_pce_val_), np.max(y_val_), 100) + y_predicted = regression_model.predict(x_new[:, np.newaxis]) + + plt.scatter(y_pce_val_, y_val_, color='gold', linewidth=2) + plt.plot(x_new, y_predicted, color='k') + + # Calculate the adjusted R_squared and RMSE + # the total number of explanatory variables in the model + # (not including the constant term) + length_list = [] + for key, value in PCEModel.basis_dict[key].items(): + length_list.append(len(value)) + n_predictors = min(length_list) + n_samples = x_val.shape[0] + + R2 = r2_score(y_pce_val_, y_val_) + AdjR2 = 1 - (1 - R2) * (n_samples - 1) / \ + (n_samples - n_predictors - 1) + rmse = mean_squared_error(y_pce_val_, y_val_, squared=False) + + plt.annotate(f'RMSE = {rmse:.3f}\n Adjusted $R^2$ = {AdjR2:.3f}', + xy=(0.05, 0.85), xycoords='axes fraction') + + plt.ylabel("Original Model") + plt.xlabel("PCE Model") + plt.grid() + plt.show() + + if save_fig: + # save the current figure + pdf1.savefig(fig, bbox_inches='tight') + + # Destroy the current plot + plt.clf() + + # Close the pdfs + pdf1.close() + + # ------------------------------------------------------------------------- + def _plot_validation_multi(self, x_values=[], x_axis="x [m]", save_fig=True): + """ + Plots outputs for visual comparison of metamodel outputs with that of + the (full) multioutput original model + + Parameters + ---------- + x_values : list or array, optional + List of x values. The default is []. + x_axis : string, optional + Label of the x axis. The default is "x [m]". + save_fig : bool, optional + Whether to save the figures. The default is True. + + Returns + ------- + None. + + """ + Model = self.MetaModel.ModelObj + + if save_fig: + newpath = f'Outputs_PostProcessing_{self.name}/' + if not os.path.exists(newpath): + os.makedirs(newpath) + + # create a PdfPages object + pdf = PdfPages(f'./{newpath}/Model_vs_PCEModel.pdf') + + # List of markers and colors + color = cycle((['b', 'g', 'r', 'y', 'k'])) + marker = cycle(('x', 'd', '+', 'o', '*')) + + fig = plt.figure() + # Plot the model vs PCE model + for keyIdx, key in enumerate(Model.Output.names): + + y_pce_val = self.pce_out_mean[key] + y_pce_val_std = self.pce_out_std[key] + y_val = self.model_out_dict[key] + try: + x = self.model_out_dict['x_values'][key] + except KeyError: + x = x_values + + for idx in range(y_val.shape[0]): + Color = next(color) + Marker = next(marker) + + plt.plot(x, y_val[idx], color=Color, marker=Marker, + label='$Y_{i}^{M}$'.format(i=idx+1)) + + plt.plot(x, y_pce_val[idx], color=Color, marker=Marker, + linestyle='--', + label='$Y_{i}^{PCE}$'.format(i=idx+1)) + plt.fill_between(x, y_pce_val[idx]-1.96*y_pce_val_std[idx], + y_pce_val[idx]+1.96*y_pce_val_std[idx], + color=Color, alpha=0.15) + + # Calculate the RMSE + rmse = mean_squared_error(y_pce_val, y_val, squared=False) + R2 = r2_score(y_pce_val[idx].reshape(-1, 1), + y_val[idx].reshape(-1, 1)) + + plt.annotate(f'RMSE = {rmse:3f}\n $R^2$ = {R2:3f}', xy=(0.2, 0.75), + xycoords='axes fraction') + + plt.ylabel(key) + plt.xlabel(x_axis) + plt.legend(loc='best') + plt.grid() + + if save_fig: + # save the current figure + pdf.savefig(fig, bbox_inches='tight') + # Destroy the current plot + plt.clf() + + pdf.close() + + # Zip the subdirectories + Model.zip_subdirs(f'{Model.name}valid', f'{Model.name}valid_') diff --git a/src/bayesvalidrox/pylink/pylink.py b/src/bayesvalidrox/pylink/pylink.py index 69d272656..0e0d450ec 100644 --- a/src/bayesvalidrox/pylink/pylink.py +++ b/src/bayesvalidrox/pylink/pylink.py @@ -97,24 +97,22 @@ class PyLinkForwardModel(object): """ if case.lower() == 'calib': - if hasattr(self, 'observations') and \ - isinstance(self.observations, dict): + if bool(self.observations): obs = pd.DataFrame.from_dict(self.observations) - elif hasattr(self, 'observations'): - FilePath = os.path.join(os.getcwd(), self.meas_file) - obs = pd.read_csv(FilePath, delimiter=',') + elif self.meas_file is not None: + file_path = os.path.join(os.getcwd(), self.meas_file) + obs = pd.read_csv(file_path, delimiter=',') else: raise Exception("Please provide the observation data as a " "dictionary via observations attribute or pass" " the csv-file path to MeasurementFile " "attribute") elif case.lower() == 'valid': - if hasattr(self, 'observations_valid') and \ - isinstance(self.observations_valid, dict): + if bool(self.observations_valid): obs = pd.DataFrame.from_dict(self.observations_valid) - elif hasattr(self, 'observations_valid'): - FilePath = os.path.join(os.getcwd(), self.meas_file_valid) - obs = pd.read_csv(FilePath, delimiter=',') + elif self.meas_file_valid is not None: + file_path = os.path.join(os.getcwd(), self.meas_file_valid) + obs = pd.read_csv(file_path, delimiter=',') else: raise Exception("Please provide the observation data as a " "dictionary via Observations attribute or pass" @@ -122,15 +120,15 @@ class PyLinkForwardModel(object): "attribute") # Compute the number of observation - nObs = reduce(lambda x, y: x*y, obs[self.Output.names].shape) + n_obs = obs[self.Output.names].notnull().sum().values.sum() if case.lower() == 'calib': self.observations = obs - self.nObs = nObs + self.n_obs = n_obs return self.observations elif case.lower() == 'valid': self.observations_valid = obs - self.nObsValid = nObs + self.n_obs_valid = n_obs return self.observations_valid # ------------------------------------------------------------------------- @@ -147,11 +145,11 @@ class PyLinkForwardModel(object): if self.mc_ref_file is None: return elif hasattr(self, 'mc_reference') and \ - isinstance(self.mc_reference, dict): + isinstance(self.mc_reference, dict): self.mc_reference = pd.DataFrame.from_dict(self.mc_reference) elif hasattr(self, 'mc_reference'): - FilePath = os.path.join(os.getcwd(), self.mc_ref_file) - self.mc_reference = pd.read_csv(FilePath, delimiter=',') + file_path = os.path.join(os.getcwd(), self.mc_ref_file) + self.mc_reference = pd.read_csv(file_path, delimiter=',') else: raise Exception("Please provide the MC reference data as a " "dictionary via mc_reference attribute or pass the" @@ -268,7 +266,7 @@ class PyLinkForwardModel(object): necessary files to this directory and renames them. Next, it executes the given command. """ - CollocationPoints, Run_No, keyString = xx + c_points, run_no, key_str = xx # Handle if only one imput file is provided if not isinstance(self.input_template, list): @@ -276,58 +274,57 @@ class PyLinkForwardModel(object): if not isinstance(self.input_file, list): self.input_file = [self.input_file] - NewInputfile = [] + new_input_file = [] # Loop over the InputTemplates: - for InputTemplate in self.input_template: - if '/' in InputTemplate: - InputTemplate = InputTemplate.split('/')[-1] - NewInputfile.append(InputTemplate.split('.tpl')[0] + keyString + - f"_{Run_No+1}" + - InputTemplate.split('.tpl')[1]) + for in_temp in self.input_template: + if '/' in in_temp: + in_temp = in_temp.split('/')[-1] + new_input_file.append(in_temp.split('.tpl')[0] + key_str + + f"_{run_no+1}" + in_temp.split('.tpl')[1]) # Create directories - newpath = self.name + keyString + f'_{Run_No+1}' + newpath = self.name + key_str + f'_{run_no+1}' if not os.path.exists(newpath): os.makedirs(newpath) # Copy the necessary files to the directories - for InputTemplate in self.input_template: + for in_temp in self.input_template: # Input file(s) of the model - shutil.copy2(InputTemplate, newpath) + shutil.copy2(in_temp, newpath) # Auxiliary file if self.aux_file is not None: shutil.copy2(self.aux_file, newpath) # Auxiliary file # Rename the Inputfile and/or auxiliary file os.chdir(newpath) - for input_tem, input_file in zip(self.input_template, NewInputfile): - if '/' in InputTemplate: + for input_tem, input_file in zip(self.input_template, new_input_file): + if '/' in input_tem: input_tem = input_tem.split('/')[-1] os.rename(input_tem, input_file) # Update the parametrs in Input file - self.update_input_params(NewInputfile, CollocationPoints) + self.update_input_params(new_input_file, c_points) # Update the user defined command and the execution path try: - NewCommand = self.shell_command .replace(self.input_file[0], - NewInputfile[0]).replace( - self.input_file[1], - NewInputfile[1]) + new_command = self.shell_command.replace(self.input_file[0], + new_input_file[0]) + new_command = new_command.replace(self.input_file[1], + new_input_file[1]) except: - NewCommand = self.shell_command.replace(self.input_file[0], - NewInputfile[0]) - - OutputFileNames = self.Output.file_names - self.exe_path = os.getcwd() + new_command = self.shell_command.replace(self.input_file[0], + new_input_file[0]) + # Set the exe path if not provided + if not bool(self.exe_path): + self.exe_path = os.getcwd() # Run the model - Output = self.run_command(NewCommand, OutputFileNames) + output = self.run_command(new_command, self.Output.file_names) - return Output + return output # ------------------------------------------------------------------------- - def run_model_parallel(self, c_points, prevRun_No=0, keyString='', + def run_model_parallel(self, c_points, prevRun_No=0, key_str='', mp=True): """ Runs model simulations. If mp is true (default), then the simulations @@ -340,7 +337,7 @@ class PyLinkForwardModel(object): prevRun_No : int, optional Previous run number, in case the sequential design is selected. The default is 0. - keyString : string, optional + key_str : string, optional A descriptive string for validation runs. The default is ''. mp : bool, optional Multiprocessing. The default is True. @@ -357,14 +354,13 @@ class PyLinkForwardModel(object): """ # Create hdf5 metadata - hdf5file = 'ExpDesign'+'_'+self.name+'.hdf5' + hdf5file = f'ExpDesign_{self.name}.hdf5' hdf5_exist = os.path.exists(hdf5file) file = h5py.File(hdf5file, 'a') # Initilization - P = len(c_points) - OutputNames = self.Output.names - self.NofVar = len(OutputNames) + n_c_points = len(c_points) + self.n_outputs = len(self.Output.names) all_outputs = {} # Extract the function @@ -376,27 +372,28 @@ class PyLinkForwardModel(object): # --------------------------------------------------------------- # Start a pool with the number of CPUs if self.n_cpus is None: - nrCPUs = multiprocessing.cpu_count() + n_cpus = multiprocessing.cpu_count() else: - nrCPUs = self.n_cpus + n_cpus = self.n_cpus # Run forward model either normal or with multiprocessing if not self.multi_process: group_results = list([self.run_forwardmodel((c_points, prevRun_No, - keyString))]) + key_str))]) else: - with multiprocessing.Pool(nrCPUs) as p: - desc = f'Running forward model {keyString}' + with multiprocessing.Pool(n_cpus) as p: + desc = f'Running forward model {key_str}' if self.link_type.lower() == 'function': imap_var = p.imap(Function, c_points[:, np.newaxis]) else: - imap_var = p.imap(self.run_forwardmodel, - zip(c_points, - [prevRun_No+i for i in range(P)], - [keyString]*P)) + args = zip(c_points, + [prevRun_No+i for i in range(n_c_points)], + [key_str]*n_c_points) + imap_var = p.imap(self.run_forwardmodel, args) - group_results = list(tqdm.tqdm(imap_var, total=P, desc=desc)) + group_results = list(tqdm.tqdm(imap_var, total=n_c_points, + desc=desc)) # Save time steps or x-values x_values = group_results[0][0] @@ -404,14 +401,14 @@ class PyLinkForwardModel(object): if not hdf5_exist: if type(x_values) is dict: grp_x_values = file.create_group("x_values/") - for varIdx, var in enumerate(OutputNames): + for varIdx, var in enumerate(self.Output.names): grp_x_values.create_dataset(var, data=x_values[var]) else: file.create_dataset("x_values", data=x_values) # save each output in their corresponding array NaN_idx = [] - for varIdx, var in enumerate(OutputNames): + for varIdx, var in enumerate(self.Output.names): if not hdf5_exist: grpY = file.create_group("EDY/"+var) @@ -421,32 +418,32 @@ class PyLinkForwardModel(object): Outputs = np.asarray([item[varIdx+1] for item in group_results], dtype=np.float64) - if prevRun_No == 0 and keyString == '': - grpY.create_dataset(f'init_{keyString}', data=Outputs) + if prevRun_No == 0 and key_str == '': + grpY.create_dataset(f'init_{key_str}', data=Outputs) else: try: - oldEDY = np.array(file[f'EDY/{var}/adaptive_{keyString}']) - del file[f'EDY/{var}/adaptive_{keyString}'] + oldEDY = np.array(file[f'EDY/{var}/adaptive_{key_str}']) + del file[f'EDY/{var}/adaptive_{key_str}'] data = np.vstack((oldEDY, Outputs)) except KeyError: data = Outputs - grpY.create_dataset('adaptive_'+keyString, data=data) + grpY.create_dataset('adaptive_'+key_str, data=data) NaN_idx = np.unique(np.argwhere(np.isnan(Outputs))[:, 0]) all_outputs[var] = np.delete(Outputs, NaN_idx, axis=0) - if prevRun_No == 0 and keyString == '': - grpY.create_dataset(f"New_init_{keyString}", + if prevRun_No == 0 and key_str == '': + grpY.create_dataset(f"New_init_{key_str}", data=all_outputs[var]) else: try: - name = f'EDY/{var}/New_adaptive_{keyString}' + name = f'EDY/{var}/New_adaptive_{key_str}' oldEDY = np.array(file[name]) - del file[f'EDY/{var}/New_adaptive_{keyString}'] + del file[f'EDY/{var}/New_adaptive_{key_str}'] data = np.vstack((oldEDY, all_outputs[var])) except KeyError: data = all_outputs[var] - grpY.create_dataset(f'New_adaptive_{keyString}', data=data) + grpY.create_dataset(f'New_adaptive_{key_str}', data=data) # Print the collocation points whose simulations crashed if len(NaN_idx) != 0: @@ -463,30 +460,30 @@ class PyLinkForwardModel(object): # Save CollocationPoints grpX = file.create_group("EDX") if not hdf5_exist else file.get("EDX") - if prevRun_No == 0 and keyString == '': - grpX.create_dataset("init_"+keyString, data=c_points) + if prevRun_No == 0 and key_str == '': + grpX.create_dataset("init_"+key_str, data=c_points) if len(NaN_idx) != 0: - grpX.create_dataset("New_init_"+keyString, data=new_c_points) + grpX.create_dataset("New_init_"+key_str, data=new_c_points) else: try: - name = f'EDX/adaptive_{keyString}' + name = f'EDX/adaptive_{key_str}' oldCollocationPoints = np.array(file[name]) - del file[f'EDX/adaptive_{keyString}'] + del file[f'EDX/adaptive_{key_str}'] data = np.vstack((oldCollocationPoints, new_c_points)) except KeyError: data = new_c_points - grpX.create_dataset('adaptive_'+keyString, data=data) + grpX.create_dataset('adaptive_'+key_str, data=data) if len(NaN_idx) != 0: try: - name = f'EDX/New_adaptive_{keyString}' + name = f'EDX/New_adaptive_{key_str}' oldCollocationPoints = np.array(file[name]) - del file[f'EDX/New_adaptive_{keyString}'] + del file[f'EDX/New_adaptive_{key_str}'] data = np.vstack((oldCollocationPoints, new_c_points)) except KeyError: data = new_c_points - grpX.create_dataset('New_adaptive_'+keyString, data=data) + grpX.create_dataset('New_adaptive_'+key_str, data=data) # Close h5py file file.close() @@ -511,38 +508,38 @@ class PyLinkForwardModel(object): """ # setup file paths variable - DirectoryList = [] - filePaths = [] + dir_list = [] + file_paths = [] # Read all directory, subdirectories and file lists - dirName = os.getcwd() + dir_path = os.getcwd() - for root, directories, files in os.walk(dirName): + for root, directories, files in os.walk(dir_path): for directory in directories: # Create the full filepath by using os module. if key in directory: - folderPath = os.path.join(dirName, directory) - DirectoryList.append(folderPath) + folderPath = os.path.join(dir_path, directory) + dir_list.append(folderPath) # Loop over the identified directories to store the file paths - for directName in DirectoryList: - for root, directories, files in os.walk(directName): + for direct_name in dir_list: + for root, directories, files in os.walk(direct_name): for filename in files: # Create the full filepath by using os module. filePath = os.path.join(root, filename) - filePaths.append('.'+filePath.split(dirName)[1]) + file_paths.append('.'+filePath.split(dir_path)[1]) # writing files to a zipfile - if len(filePaths) != 0: + if len(file_paths) != 0: zip_file = zipfile.ZipFile(dir_name+'.zip', 'w') with zip_file: # writing each file one by one - for file in filePaths: + for file in file_paths: zip_file.write(file) - FilePaths = [path for path in os.listdir('.') if key in path] + file_paths = [path for path in os.listdir('.') if key in path] - for path in FilePaths: + for path in file_paths: shutil.rmtree(path) print("\n") diff --git a/src/bayesvalidrox/surrogate_models/exp_designs.py b/src/bayesvalidrox/surrogate_models/exp_designs.py index 49a56e1ce..1dfe48fd6 100644 --- a/src/bayesvalidrox/surrogate_models/exp_designs.py +++ b/src/bayesvalidrox/surrogate_models/exp_designs.py @@ -71,10 +71,11 @@ class ExpDesigns: """ try: - samples = chaospy.generate_samples(n_samples, domain=self.JDist, - rule=sampling_method) + samples = chaospy.generate_samples( + int(n_samples), domain=self.JDist, rule=sampling_method + ) except: - samples = self.JDist.resample(n_samples) + samples = self.JDist.resample(int(n_samples)) # Transform samples to the original space if transform: @@ -84,7 +85,7 @@ class ExpDesigns: return samples.T # ------------------------------------------------------------------------- - def generate_ED(self, n_samples, sample_method='random', transform=False, + def generate_ED(self, n_samples, sampling_method='random', transform=False, max_pce_deg=None): """ Generates experimental designs (training set) with the given method. @@ -93,7 +94,7 @@ class ExpDesigns: ---------- n_samples : int Number of requested training points. - sample_method : string, optional + sampling_method : string, optional Sampling method. The default is 'random'. transform : bool, optional Isoprobabilistic transformation. The default is False. @@ -129,25 +130,25 @@ class ExpDesigns: for i in range(self.ndim): low_bound = np.min(Inputs.Marginals[i].input_data) up_bound = np.max(Inputs.Marginals[i].input_data) - Inputs.Marginals[i].Parameters = [low_bound, up_bound] + Inputs.Marginals[i].parameters = [low_bound, up_bound] # Generate the samples based on requested method self.raw_data, self.bound_tuples = self.init_param_space(max_pce_deg) # Pass user-defined samples as ED - if self.sampling_method == 'user': + if sampling_method == 'user': samples = self.X - self.NrSamples = len(samples) + self.n_samples = len(samples) # Sample the distribution of parameters elif self.input_data_given: # Case II: Input values are directly given by the user. - if self.sampling_method == 'random': + if sampling_method == 'random': samples = self.random_sampler(n_samples) - elif self.sampling_method == 'PCM' or \ - self.sampling_method == 'LSCM': + elif sampling_method == 'PCM' or \ + sampling_method == 'LSCM': samples = self.pcm_sampler(max_pce_deg) else: @@ -155,14 +156,14 @@ class ExpDesigns: try: samples = chaospy.generate_samples(n_samples, domain=self.JDist, - rule=sample_method).T + rule=sampling_method).T except: samples = self.JDist.sample(n_samples) elif not self.input_data_given: # Case I = User passed known distributions samples = chaospy.generate_samples(n_samples, domain=self.JDist, - rule=sample_method).T + rule=sampling_method).T # Transform samples to the original space if transform: @@ -284,16 +285,11 @@ class ExpDesigns: if Inputs.Marginals[parIdx].dist_type is None: data = Inputs.Marginals[parIdx].input_data all_data.append(data) - dist_type, params = self.FitDist(data) - Inputs.Marginals[parIdx].dist_type = dist_type - Inputs.Marginals[parIdx].parameters = params + dist_type = None else: dist_type = Inputs.Marginals[parIdx].dist_type params = Inputs.Marginals[parIdx].parameters - # Make disttpe lower case for consistancy - dist_type = dist_type.lower() - if rosenblatt: polytype = 'hermite' dist = chaospy.Normal() @@ -302,36 +298,36 @@ class ExpDesigns: polytype = 'arbitrary' dist = None - elif 'unif' in dist_type: + elif 'unif' in dist_type.lower(): polytype = 'legendre' dist = chaospy.Uniform(lower=params[0], upper=params[1]) - elif 'norm' in dist_type: + elif 'norm' in dist_type.lower(): polytype = 'hermite' dist = chaospy.Normal(mu=params[0], sigma=params[1]) - elif 'gamma' in dist_type: + elif 'gamma' in dist_type.lower(): polytype = 'laguerre' dist = chaospy.Gamma(shape=params[0], scale=params[1], shift=params[2]) - elif 'beta' in dist_type: + elif 'beta' in dist_type.lower(): polytype = 'jacobi' dist = chaospy.Beta(alpha=params[0], beta=params[1], lower=params[2], upper=params[3]) - elif 'lognorm' in dist_type: + elif 'lognorm' in dist_type.lower(): polytype = 'hermite' Mu = np.log(params[0]**2/np.sqrt(params[0]**2 + params[1]**2)) Sigma = np.sqrt(np.log(1 + params[1]**2 / params[0]**2)) dist = chaospy.LogNormal(mu=Mu, sigma=Sigma) - elif 'expon' in dist_type: + elif 'expon' in dist_type.lower(): polytype = 'arbitrary' dist = chaospy.Exponential(scale=params[0], shift=params[1]) - elif 'weibull' in dist_type: + elif 'weibull' in dist_type.lower(): polytype = 'arbitrary' dist = chaospy.Weibull(shape=params[0], scale=params[1], shift=params[2]) @@ -354,10 +350,10 @@ class ExpDesigns: # Naive approach: Fit a gaussian kernel to the provided data Data = np.asarray(all_data) orig_space_dist = st.gaussian_kde(Data) - self.priorSpace = orig_space_dist + self.prior_space = orig_space_dist else: orig_space_dist = chaospy.J(*orig_joints) - self.priorSpace = st.gaussian_kde(orig_space_dist.sample(10000)) + self.prior_space = st.gaussian_kde(orig_space_dist.sample(10000)) return orig_space_dist, poly_types @@ -407,7 +403,7 @@ class ExpDesigns: raw_data = self.raw_data - # Guess the closest degree to self.NrSamples + # Guess the closest degree to self.n_samples def M_uptoMax(deg): result = [] for d in range(1, deg+1): @@ -415,7 +411,7 @@ class ExpDesigns: (math.factorial(self.ndim) * math.factorial(d))) return np.array(result) - guess_Deg = np.where(M_uptoMax(max_deg) > self.NrSamples)[0][0] + guess_Deg = np.where(M_uptoMax(max_deg) > self.n_samples)[0][0] c_points = np.zeros((guess_Deg+1, self.ndim)) @@ -458,7 +454,7 @@ class ExpDesigns: if self.sampling_method.lower() == 'lscm': opt_col_points = sort_unique_combos else: - opt_col_points = sort_unique_combos[0:self.NrSamples] + opt_col_points = sort_unique_combos[0:self.n_samples] return opt_col_points @@ -539,7 +535,7 @@ class ExpDesigns: return tr_X # ------------------------------------------------------------------------- - def FitDist(self, y): + def fit_dist(self, y): """ Fits the known distributions to the data. diff --git a/src/bayesvalidrox/surrogate_models/inputs.py b/src/bayesvalidrox/surrogate_models/inputs.py index b02ec0730..c98cb78b5 100644 --- a/src/bayesvalidrox/surrogate_models/inputs.py +++ b/src/bayesvalidrox/surrogate_models/inputs.py @@ -30,4 +30,4 @@ class Marginal: self.dist_type = None self.moments = None self.input_data = [] - self.parameters = [0, 1] + self.parameters = None diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py index ccf6a35f2..4f33e9015 100644 --- a/src/bayesvalidrox/surrogate_models/sequential_design.py +++ b/src/bayesvalidrox/surrogate_models/sequential_design.py @@ -84,6 +84,9 @@ class SeqDesign(): if not isinstance(MetaModel.ExpDesign.util_func, list): util_func = [MetaModel.ExpDesign.util_func] + # Read observations or MCReference + if len(Model.observations) != 0: + self.observations = self.Model.read_observation() # ---------- Initial PCEModel ---------- PCEModel = MetaModel.train_norm_design(Model) initPCEModel = deepcopy(PCEModel) @@ -92,7 +95,7 @@ class SeqDesign(): OutputName = Model.Output.names # Estimation of the integral via Monte Varlo integration - obs_data = PCEModel.Observations + obs_data = self.observations # Check if data is provided TotalSigma2 = np.empty((0, 1)) @@ -472,8 +475,8 @@ class SeqDesign(): Y_mean_can, Y_std_can = self.eval_metamodel(samples=np.array([X_can])) # Get the data - obs_data = self.Observations - nObs = self.Model.nObs + obs_data = self.observations + nObs = self.Model.n_obs # TODO: Analytical DKL # Sample a distribution for a normal dist # with Y_mean_can as the mean and Y_std_can as std. @@ -716,7 +719,7 @@ class SeqDesign(): PCE_SparseBayes_can = PCEModel # Get the data - obs_data = self.MetaModel.Observations + obs_data = self.observations # ---------- Inner MC simulation for computing Utility Value ---------- # Estimation of the integral via Monte Varlo integration diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index e8620e1bb..aa8cb2137 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -34,7 +34,9 @@ from .reg_fast_ard import RegressionFastARD from .reg_fast_laplace import RegressionFastLaplace from .bayes_linear import VBLinearRegression, EBLinearRegression warnings.filterwarnings("ignore") -#plt.style.use('../bayesvalidrox.mplstyle') +# Load the mplstyle +plt.style.use(os.path.join(os.path.split(__file__)[0], + '../', 'bayesvalidrox.mplstyle')) class MetaModel: @@ -85,12 +87,12 @@ class MetaModel: if type(self.pce_deg) is not np.ndarray: self.pce_deg = np.array(self.pce_deg) - if self.ExpDesign.Method == 'sequential': + if self.ExpDesign.method == 'sequential': from .sequential_design import SeqDesign seq_design = SeqDesign(self) metamodel = seq_design.train_seq_design(Model) - elif self.ExpDesign.Method == 'normal': + elif self.ExpDesign.method == 'normal': self.ExpDesignFlag = 'normal' metamodel = self.train_norm_design(Model) @@ -139,11 +141,6 @@ class MetaModel: self.LCerror = self.auto_vivification() self.x_scaler = {} - # Read observations or MCReference - if self.ExpDesignFlag != 'sequential': - if len(Model.observations) != 0: - self.Observations = Model.read_observation() - # Define the DegreeArray nSamples, ndim = CollocationPoints.shape self.DegreeArray = self.__select_degree(ndim, nSamples) @@ -337,7 +334,7 @@ class MetaModel: self.bound_tuples = ExpDesign.bound_tuples # ---- Run simulations at X ---- - if not hasattr(ExpDesign, 'Y'): + if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: print('\n Now the forward model needs to be run!\n') ED_Y, up_ED_X = Model.run_model_parallel(ED_X) ExpDesign.X = up_ED_X -- GitLab