diff --git a/src/bayesvalidrox/bayes_inference/bayes_inference.py b/src/bayesvalidrox/bayes_inference/bayes_inference.py index c8073ba76ebeb3249646e17b7f395f8a2472e2f6..22af7eea1f4d06b4f1f4ce7d808e57c69fdd93ba 100644 --- a/src/bayesvalidrox/bayes_inference/bayes_inference.py +++ b/src/bayesvalidrox/bayes_inference/bayes_inference.py @@ -331,7 +331,7 @@ class BayesInference: for itr_idx, data in tqdm( enumerate(self.perturbed_data), total=self.n_bootstrap_itrs, - desc="Boostraping the BME calculations", ascii=True + desc="Bootstrapping the BME calculations", ascii=True ): # ---------------- Likelihood calculation ---------------- diff --git a/src/bayesvalidrox/bayes_inference/mcmc.py b/src/bayesvalidrox/bayes_inference/mcmc.py index 6b9ca94823fb7064baa2f0588d0f97fb4c9d1d44..cc632c2de57140ab0ba4bd4c81a49316406dacd1 100755 --- a/src/bayesvalidrox/bayes_inference/mcmc.py +++ b/src/bayesvalidrox/bayes_inference/mcmc.py @@ -251,7 +251,7 @@ class MCMC: # output current autocorrelation estimate if self.verbose: - print(f"Mean autocorr time estimate: {np.nanmean(tau):.3f}") + print(f"Mean autocorr. time estimate: {np.nanmean(tau):.3f}") list_gr = np.round(self.gelman_rubin(sampler.chain), 3) print("Gelman-Rubin Test*: ", list_gr) @@ -282,15 +282,15 @@ class MCMC: # Print summary print('\n') print('-'*15 + 'Posterior diagnostics' + '-'*15) - print(f"mean auto-correlation time: {np.nanmean(tau):.3f}") - print(f"thin: {thin}") - print(f"burn-in: {burnin}") - print(f"flat chain shape: {finalsamples.shape}") - print(f"Mean acceptance fraction: {acc_fr:.3f}") - print("Gelman-Rubin Test*: ", list_gr) + print(f"Mean auto-correlation time: {np.nanmean(tau):.3f}") + print(f"Thin: {thin}") + print(f"Burn-in: {burnin}") + print(f"Flat chain shape: {finalsamples.shape}") + print(f"Mean acceptance fraction*: {acc_fr:.3f}") + print("Gelman-Rubin Test**: ", list_gr) print("\n* This value must lay between 0.234 and 0.5.") - print("* These values must be smaller than 1.1.") + print("** These values must be smaller than 1.1.") print('-'*50) print(f"\n>>>> Bayesian inference with MCMC for {self.BayesOpts.name} " @@ -777,7 +777,7 @@ class MCMC: ) if ~np.isfinite(tmp['logml']): warnings.warn( - "logml could not be estimated within maxiter, rerunning with " + "Logml could not be estimated within maxiter, rerunning with " "adjusted starting value. Estimate might be more variable than" " usual.") # use geometric mean as starting value diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 4db354bbd541affdc50a8dce37eb293188091f73..e1795a74ce7a7b7a7c9519b30b53e1ea11d4eab8 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -237,8 +237,8 @@ class PostProcessing: elif samples is not None: self.n_samples = samples.shape[0] else: - raise Exception("Please provide either samples or pass number of " - "samples!") + raise Exception("Please provide either samples or pass the number" + " of samples!") # Generate random samples if necessary Samples = self._get_sample() if samples is None else samples @@ -922,7 +922,7 @@ class PostProcessing: ax = plt.gca() _, p = stats.shapiro(residuals) if p < 0.01: - annText = "The residuals seem to come from Gaussian process." + annText = "The residuals seem to come from a Gaussian Process." else: annText = "The normality assumption may not hold." at = AnchoredText(annText, prop=dict(size=30), frameon=True, diff --git a/src/bayesvalidrox/pylink/pylink.py b/src/bayesvalidrox/pylink/pylink.py index 7af4a14b8f8acb62c21862d5df4a527dc7fe57b9..011c692f1784b35b151d49ddaa5c6b31dce6124d 100644 --- a/src/bayesvalidrox/pylink/pylink.py +++ b/src/bayesvalidrox/pylink/pylink.py @@ -170,9 +170,9 @@ class PyLinkForwardModel(object): obs = self.observations else: raise Exception("Please provide the observation data as a " - "dictionary via observations attribute or pass" - " the csv-file path to MeasurementFile " - "attribute") + "dictionary via the observations attribute or " + "pass the csv-file path to the MeasurementFile" + " attribute") elif case.lower() == 'valid': if isinstance(self.observations_valid, dict) and \ bool(self.observations_valid): @@ -184,9 +184,9 @@ class PyLinkForwardModel(object): obs = self.observations_valid else: raise Exception("Please provide the observation data as a " - "dictionary via Observations attribute or pass" - " the csv-file path to MeasurementFile " - "attribute") + "dictionary via the observations attribute or " + "pass the csv-file path to the MeasurementFile" + " attribute") # Compute the number of observation n_obs = obs[self.Output.names].notnull().sum().values.sum() @@ -316,8 +316,8 @@ class PyLinkForwardModel(object): Process = os.system(f'./../{command}') if Process != 0: print('\nMessage 1:') - print(f'\tIf value of \'{Process}\' is a non-zero value, ' - 'then compilation problems \n' % Process) + print(f'\tIf the value of \'{Process}\' is a non-zero value' + ', then compilation problems \n' % Process) # TODO: occur? os.chdir("..") @@ -499,7 +499,7 @@ class PyLinkForwardModel(object): if len(NaN_idx) != 0: print('\n') print('*'*20) - print("\nThe following parametersets have been removed:\n", + print("\nThe following parameter sets have been removed:\n", c_points[NaN_idx]) print("\n") print('*'*20) @@ -656,6 +656,6 @@ class PyLinkForwardModel(object): shutil.rmtree(path) print("\n") - print(f'{dir_name}.zip file has been created successfully!\n') + print(f'{dir_name}.zip has been created successfully!\n') return diff --git a/src/bayesvalidrox/surrogate_models/bayes_linear.py b/src/bayesvalidrox/surrogate_models/bayes_linear.py index a7d6b5929a83fc89d15d7ab8f369187d0542923c..767e6fff73df0d524dcf94e620b0a67ae0742d83 100644 --- a/src/bayesvalidrox/surrogate_models/bayes_linear.py +++ b/src/bayesvalidrox/surrogate_models/bayes_linear.py @@ -138,7 +138,7 @@ class EBLinearRegression(BayesianLinearRegression): normalize=True, perfect_fit_tol = 1e-6, alpha = 1, copy_X = True, verbose = False): super(EBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose) if optimizer not in ['em','fp']: - raise ValueError('Optimizer can be either "em" of "fp" ') + raise ValueError('Optimizer can be either "em" or "fp" ') self.optimizer = optimizer self.alpha = alpha self.perfect_fit = False diff --git a/src/bayesvalidrox/surrogate_models/meta_model_engine.py b/src/bayesvalidrox/surrogate_models/meta_model_engine.py index 2df2dee5390ae4e6dc7eb88343c2469dbd88aad6..fb13076e92876fffa2445f992cb65e31bedf883e 100644 --- a/src/bayesvalidrox/surrogate_models/meta_model_engine.py +++ b/src/bayesvalidrox/surrogate_models/meta_model_engine.py @@ -14,11 +14,16 @@ import scipy.optimize as opt from sklearn.metrics import mean_squared_error import multiprocessing import matplotlib.pyplot as plt +import pandas as pd import sys import os import gc import seaborn as sns from joblib import Parallel, delayed + + +from bayes_inference.bayes_inference import BayesInference +from bayes_inference.discrepancy import Discrepancy from .exploration import Exploration @@ -1067,7 +1072,7 @@ class MetaModelEngine(): maxfun=max_func_itr) if verbose: - print(f"global minimum: xmin = {Res_Global.x}, " + print(f"Global minimum: xmin = {Res_Global.x}, " f"f(xmin) = {Res_Global.fun:.6f}, nfev = {Res_Global.nfev}") return (Run_No, Res_Global.x) @@ -1237,7 +1242,7 @@ class MetaModelEngine(): elapsed_time = time.time() - start_time print("\n") - print(f"elapsed_time: {round(elapsed_time,2)} sec.") + print(f"Elapsed_time: {round(elapsed_time,2)} sec.") print('-'*20) elif explore_method == 'LOOCV': @@ -1376,6 +1381,31 @@ class MetaModelEngine(): # ------- Calculate Total score ------- # ------- Trade off between EXPLORATION & EXPLOITATION ------- + # Accumulate the samples + finalCandidates = np.concatenate((allCandidates, candidates), axis = 0) + finalCandidates = np.unique(finalCandidates, axis = 0) + + # Calculations take into account both exploration and exploitation + # samples without duplicates + totalScore = np.zeros(finalCandidates.shape[0]) + #self.totalScore = totalScore + + for cand_idx in range(finalCandidates.shape[0]): + # find candidate indices + idx1 = np.where(allCandidates == finalCandidates[cand_idx])[0] + idx2 = np.where(candidates == finalCandidates[cand_idx])[0] + + # exploration + if idx1 != []: + idx1 = idx1[0] + totalScore[cand_idx] += explore_w * scoreExploration[idx1] + + # exploitation + if idx2 != []: + idx2 = idx2[0] + totalScore[cand_idx] += exploit_w * norm_U_J_d[idx2] + + # Total score totalScore = exploit_w * norm_U_J_d totalScore += explore_w * scoreExploration @@ -1405,7 +1435,10 @@ class MetaModelEngine(): # select the requested number of samples Xnew[i] = newSamples[np.argmax(maxminScore)] else: - Xnew = allCandidates[sorted_idxtotalScore[:n_new_samples]] + # Changed this from allCandiates to full set of candidates + # TODO: still not changed for e.g. 'Voronoi' + Xnew = finalCandidates[sorted_idxtotalScore[:n_new_samples]] + elif exploit_method == 'VarOptDesign': # ------- EXPLOITATION: VarOptDesign ------- @@ -1944,7 +1977,8 @@ class MetaModelEngine(): # If post_snapshot is True, plot likelihood vs refrence if post_snapshot or valid_likelihoods: # Hellinger distance - ref_like = np.log(valid_likelihoods[valid_likelihoods > 0]) + valid_likelihoods = np.array(valid_likelihoods) + ref_like = np.log(valid_likelihoods[(valid_likelihoods > 0)]) est_like = np.log(Likelihoods[Likelihoods > 0]) distHellinger = self.__hellinger_distance(ref_like, est_like) @@ -1976,9 +2010,6 @@ class MetaModelEngine(): # Bayesian inference with Emulator only for 2D problem if post_snapshot and MetaModel.n_params == 2 and not idx % 5: - from bayes_inference.bayes_inference import BayesInference - from bayes_inference.discrepancy import Discrepancy - import pandas as pd BayesOpts = BayesInference(MetaModel) BayesOpts.emulator = True BayesOpts.plot_post_pred = False diff --git a/src/bayesvalidrox/surrogate_models/reg_fast_ard.py b/src/bayesvalidrox/surrogate_models/reg_fast_ard.py index 44073da8e78642ba3b3914f6ce55a2d01986b1f1..e6883a3edd6d247c219b8be328f5206b75780fbb 100755 --- a/src/bayesvalidrox/surrogate_models/reg_fast_ard.py +++ b/src/bayesvalidrox/surrogate_models/reg_fast_ard.py @@ -286,9 +286,9 @@ class RegressionFastARD(LinearModel, RegressorMixin): # raise warning in case cholesky fails if warning_flag == 1: warnings.warn(("Cholesky decomposition failed! Algorithm uses " - "pinvh, which is significantly slower, if you " + "pinvh, which is significantly slower. If you " "use RVR it is advised to change parameters of " - "kernel")) + "the kernel!")) # compute quality & sparsity parameters s, q, S, Q = self._sparsity_quality(XX, XXd, XY, XYa, Aa, Ri, @@ -319,7 +319,7 @@ class RegressionFastARD(LinearModel, RegressorMixin): if converged or i == self.n_iter - 1: if converged and self.verbose: - print('Algorithm converged !') + print('Algorithm converged!') break # after last update of alpha & beta update parameters diff --git a/src/bayesvalidrox/surrogate_models/reg_fast_laplace.py b/src/bayesvalidrox/surrogate_models/reg_fast_laplace.py index bdff324ede818a42d226e9aa55aaf01666ca8fc8..84faa6026e18f9013ed04b1bb08e2740671d3af3 100644 --- a/src/bayesvalidrox/surrogate_models/reg_fast_laplace.py +++ b/src/bayesvalidrox/surrogate_models/reg_fast_laplace.py @@ -307,7 +307,7 @@ class RegressionFastLaplace(): # recomputation # zero if regressor has not been chosen yet if not ind_global_to_local[ind_L_max]: - raise Exception('cannot recompute index{0} -- not yet\ + raise Exception('Cannot recompute index{0} -- not yet\ part of the model!'.format(ind_L_max)) Sigma = np.atleast_2d(Sigma) mu = np.atleast_2d(mu) diff --git a/src/bayesvalidrox/surrogate_models/sequential_design.py b/src/bayesvalidrox/surrogate_models/sequential_design.py index fc81dcd4529ca0708dfba47385aef4415992eb3e..1e0935deed32ad5c398a82879d65723b22f03080 100644 --- a/src/bayesvalidrox/surrogate_models/sequential_design.py +++ b/src/bayesvalidrox/surrogate_models/sequential_design.py @@ -1099,7 +1099,7 @@ class SeqDesign(): maxfun=max_func_itr) if verbose: - print(f"global minimum: xmin = {Res_Global.x}, " + print(f"Global minimum: xmin = {Res_Global.x}, " f"f(xmin) = {Res_Global.fun:.6f}, nfev = {Res_Global.nfev}") return (Run_No, Res_Global.x) @@ -1269,7 +1269,7 @@ class SeqDesign(): elapsed_time = time.time() - start_time print("\n") - print(f"elapsed_time: {round(elapsed_time,2)} sec.") + print(f"Elapsed_time: {round(elapsed_time,2)} sec.") print('-'*20) elif explore_method == 'LOOCV': diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index c51ea8c6679aaeec685da069154e8f460c7c4450..f71acbdc305feb146319c430f579cb5090daf64c 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -136,7 +136,7 @@ class MetaModel(): if self.ExpDesign.method == 'sequential': raise Exception( - "Please use MetaModelEngine class for the sequential design!" + "Please use the MetaModelEngine class for sequential design!" ) elif self.ExpDesign.method == 'normal': @@ -238,7 +238,7 @@ class MetaModel(): if verbose and self.n_bootstrap_itrs > 1: enum_obj = tqdm(range(self.n_bootstrap_itrs), total=self.n_bootstrap_itrs, - desc="Boostraping the metamodel", + desc="Bootstrapping the metamodel", ascii=True) else: enum_obj = range(self.n_bootstrap_itrs) @@ -516,7 +516,7 @@ class MetaModel(): self.ModelOutputDict = ExpDesign.Y else: raise Exception('Please provide either a dictionary or a hdf5' - 'file to ExpDesign.hdf5_file argument.') + 'file as the ExpDesign.hdf5_file argument.') return ED_X_tr, self.ModelOutputDict @@ -882,9 +882,9 @@ class MetaModel(): print(' '*10 + ' Summary of results ') print('='*50) - print("scores:\n", scores) - print("Best score's degree:", self.deg_array[best_deg-1]) - print("NO. of terms:", len(basis_indices)) + print("Scores:\n", scores) + print("Degree of best score:", self.deg_array[best_deg-1]) + print("No. of terms:", len(basis_indices)) print("Sparsity index:", round(len(basis_indices)/P, 3)) print("Best Indices:\n", basis_indices)