diff --git a/examples/principal_component_analysis/example_principalcomponentanalysis.py b/examples/principal_component_analysis/example_principalcomponentanalysis.py index 89ee5b30782dfaab8b56ad8fef465b2fd2b30b4e..53f9c304ebe2b2e02af0e928c583c3c72a9495e2 100644 --- a/examples/principal_component_analysis/example_principalcomponentanalysis.py +++ b/examples/principal_component_analysis/example_principalcomponentanalysis.py @@ -109,8 +109,8 @@ if __name__ == "__main__": # Select if you want to preserve the spatial/temporal depencencies MetaModelOpts.dim_red_method = 'PCA' MetaModelOpts.var_pca_threshold = 99.999 - MetaModelOpts.n_pca_components = 5#10 - MetaModelOpts.n_bootstrap_itrs = 2 + MetaModelOpts.n_pca_components = 10#5#10 + #MetaModelOpts.n_bootstrap_itrs = 2 # Select your metamodel method # 1) PCE (Polynomial Chaos Expansion) 2) aPCE (arbitrary PCE) diff --git a/examples/principal_component_analysis/test_principalcomponentanalysis.py b/examples/principal_component_analysis/test_principalcomponentanalysis.py deleted file mode 100644 index 89ee5b30782dfaab8b56ad8fef465b2fd2b30b4e..0000000000000000000000000000000000000000 --- a/examples/principal_component_analysis/test_principalcomponentanalysis.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -This test shows a surrogate-assisted Bayesian calibration of a time dependent -analytical function with and wihout applying principal component analysis. - -Author: Farid Mohammadi, M.Sc. -E-Mail: farid.mohammadi@iws.uni-stuttgart.de -Department of Hydromechanics and Modelling of Hydrosystems (LH2) -Institute for Modelling Hydraulic and Environmental Systems (IWS), University -of Stuttgart, www.iws.uni-stuttgart.de/lh2/ -Pfaffenwaldring 61 -70569 Stuttgart - -Created on Fri Aug 9 2019 - -""" -import numpy as np -import pandas as pd -import scipy.io as io -import sys -import joblib -# import bayesvalidrox -# Add BayesValidRox path -sys.path.append("../../src/") - -from bayesvalidrox.pylink.pylink import PyLinkForwardModel -from bayesvalidrox.surrogate_models.inputs import Input -from bayesvalidrox.surrogate_models.engine import Engine -from bayesvalidrox.surrogate_models.surrogate_models import MetaModel -from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns -from bayesvalidrox.bayes_inference.bayes_inference import BayesInference -from bayesvalidrox.bayes_inference.discrepancy import Discrepancy -from bayesvalidrox.post_processing.post_processing import PostProcessing -from bayesvalidrox.bayes_inference.bayes_model_comparison import BayesModelComparison -import matplotlib -matplotlib.use('agg') - - -if __name__ == "__main__": - - # Number of parameters - ndim = 10 # 2, 10 - - # ===================================================== - # ============= COMPUTATIONAL MODEL ================ - # ===================================================== - Model = PyLinkForwardModel() - - Model.link_type = 'Function' - Model.py_file = 'analytical_function' - Model.name = 'AnalyticFunc' - - Model.Output.names = ['Z'] - - # For Bayesian inversion synthetic data with X=[0,0] - Model.observations = {} - Model.observations['Time [s]'] = np.arange(0, 10, 1.) / 9 - Model.observations['Z'] = np.repeat([2.], 10) - - # For Checking with the MonteCarlo refrence - Model.mc_reference = {} - Model.mc_reference['Time [s]'] = np.arange(0, 10, 1.) / 9 - Model.mc_reference['mean'] = np.load(f"data/mean_{ndim}.npy") - Model.mc_reference['std'] = np.load(f"data/std_{ndim}.npy") - - # ===================================================== - # ========= PROBABILISTIC INPUT MODEL ============== - # ===================================================== - # Define the uncertain parameters with their mean and - # standard deviation - Inputs = Input() - - for i in range(ndim): - Inputs.add_marginals() - Inputs.Marginals[i].name = "$\\theta_{"+str(i+1)+"}$" - Inputs.Marginals[i].dist_type = 'uniform' - Inputs.Marginals[i].parameters = [-5, 5] - - # ------------------------------------------------ - # ------ Experimental Design Configuration ------- - # ------------------------------------------------ - ExpDesign = ExpDesigns(Inputs) - - # One-shot (normal) or Sequential Adaptive (sequential) Design - ExpDesign.method = 'normal' - ExpDesign.n_init_samples = 3*ndim - - # Sampling methods - # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley - # 6) chebyshev(FT) 7) grid(FT) 8)user - ExpDesign.sampling_method = 'latin_hypercube' - - # Provide the experimental design object with a hdf5 file - # ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5' - - # Defining the measurement error, if it's known a priori - obsData = pd.DataFrame(Model.observations, columns=Model.Output.names) - DiscrepancyOpts = Discrepancy('') - DiscrepancyOpts.type = 'Gaussian' - DiscrepancyOpts.parameters = obsData**2 - - - # ------------------------------------------------ - # ------------- PCE Specification ---------------- - # ------------------------------------------------ - MetaModelOpts = MetaModel(Inputs) - - # Select if you want to preserve the spatial/temporal depencencies - MetaModelOpts.dim_red_method = 'PCA' - MetaModelOpts.var_pca_threshold = 99.999 - MetaModelOpts.n_pca_components = 5#10 - MetaModelOpts.n_bootstrap_itrs = 2 - - # Select your metamodel method - # 1) PCE (Polynomial Chaos Expansion) 2) aPCE (arbitrary PCE) - # 3) GPE (Gaussian Process Emulator) - MetaModelOpts.meta_model_type = 'aPCE' - - # Select the sparse least-square minimization method for - # the PCE coefficients calculation: - # 1)OLS: Ordinary Least Square 2)BRR: Bayesian Ridge Regression - # 3)LARS: Least angle regression 4)ARD: Bayesian ARD Regression - # 5)FastARD: Fast Bayesian ARD Regression - # 6)BCS: Bayesian Compressive Sensing - # 7)OMP: Orthogonal Matching Pursuit - # 8)VBL: Variational Bayesian Learning - # 9)EBL: Emperical Bayesian Learning - MetaModelOpts.pce_reg_method = 'FastARD' - - # Bootstraping - # 1) normal 2) fast - #MetaModelOpts.bootstrap_method = 'fast' - #MetaModelOpts.n_bootstrap_itrs = 1 - - # Specify the max degree to be compared by the adaptive algorithm: - # The degree with the lowest Leave-One-Out cross-validation (LOO) - # error (or the highest score=1-LOO)estimator is chosen as the final - # metamodel. pce_deg accepts degree as a scalar or a range. - MetaModelOpts.pce_deg = 12 - - # q-quasi-norm 0<q<1 (default=1) - MetaModelOpts.pce_q_norm = 0.85 if ndim < 5 else 0.5 - - # Create the engine - engine = Engine(MetaModelOpts, Model, ExpDesign) - engine.train_normal() - print('Surrogate has been trained') - - # ===================================================== - # ======== PostProcessing on the Model only ========== - # ===================================================== - - PostPCE = PostProcessing(engine) - - # Plot to check validation visually. - PostPCE.valid_metamodel(n_samples=1) - - # Compute and print RMSE error - PostPCE.check_accuracy(n_samples=300) - - # Compute the moments and compare with the Monte-Carlo reference - if MetaModelOpts.meta_model_type != 'GPE': - PostPCE.plot_moments() - - # Plot the evolution of the KLD,BME, and Modified LOOCV error - if engine.ExpDesign.method == 'sequential': - refBME_KLD = np.load("data/refBME_KLD_"+str(ndim)+".npy") - PostPCE.plot_seq_design_diagnostics(refBME_KLD) - - # Plot the sobol indices - if MetaModelOpts.meta_model_type != 'GPE': - total_sobol = PostPCE.sobol_indices() - - - - # ===================================================== - # ======== Bayesian inference without Emulator ========== - # ===================================================== - BayesOpts = BayesInference(engine) - BayesOpts.emulator = False - BayesOpts.plot_post_pred = True - - # Select the inference method - import emcee - BayesOpts.inference_method = "MCMC" - # Set the MCMC parameters passed to self.mcmc_params - BayesOpts.mcmc_params = { - 'n_steps': 1e3, - 'n_walkers': 30, - 'moves': emcee.moves.KDEMove(), - 'multiprocessing': False, - 'verbose': False - } - - # ----- Define the discrepancy model ------- - BayesOpts.measurement_error = obsData - DiscrepancyOpts = Discrepancy('') - DiscrepancyOpts.type = 'Gaussian' - DiscrepancyOpts.parameters = obsData**2 - BayesOpts.Discrepancy = DiscrepancyOpts - - # Start the calibration/inference - Bayes_PCE = BayesOpts.create_inference() - - # Save class objects - with open(f'Bayes_{Model.name}.pkl', 'wb') as output: - joblib.dump(Bayes_PCE, output, 2) - diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index 91abbe1492ec48e1d0289268e0184db295305525..adac29fcdf60c6a23184fbc324da18283ee7133e 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -504,6 +504,7 @@ class MetaModel: None. """ + # Check size of inputs X = np.array(X) for key in y.keys(): y_val = np.array(y[key]) @@ -524,6 +525,8 @@ class MetaModel: # Evaluate the univariate polynomials on InputSpace if self.meta_model_type.lower() != 'gpe': self.univ_p_val = self.univ_basis_vals(self.CollocationPoints) + # Store the original ones for later use + orig_univ_p_val = copy.deepcopy(self.univ_p_val) # --- Loop through data points and fit the surrogate --- if verbose: @@ -612,7 +615,8 @@ class MetaModel: self.gp_poly[f'b_{b_i + 1}'][key][f"y_{idx + 1}"] = out[idx] else: - self.univ_p_val = self.univ_p_val[b_indices] + # Bootstrap the univariate polynomials for use during training + self.univ_p_val = orig_univ_p_val[b_indices] if parallel and (not fast_bootstrap or b_i == 0): out = Parallel(n_jobs=-1, backend='multiprocessing')( delayed(self.adaptive_regression)( # X_train_b, @@ -645,6 +649,9 @@ class MetaModel: self.score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore'] self.clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['clf_poly'] # self.LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['LCerror'] + + # Restore the univariate polynomials + self.univ_p_val = orig_univ_p_val if verbose: print(f"\n>>>> Training the {self.meta_model_type} metamodel" @@ -1108,16 +1115,24 @@ class MetaModel: Number of selected principal components. """ - # Transform via Principal Component Analysis - if self.n_pca_components is not None: - n_pca_components = self.n_pca_components - else: - n_samples, n_features = target.shape - + # Get current shape of the outputs + n_samples, n_features = target.shape + + # Use the given n_pca_components + n_pca_components = self.n_pca_components + + # Switch to var_pca if n_pca_components is too large + if n_pca_components >= n_features: + n_pca_components = None + print('') + print('WARNING: Too many components are set for PCA. The transformation will proceed based on explainable variance.') + + # Calculate n_pca_components based on decomposition of the variance + if n_pca_components is None: if self.var_pca_threshold is not None: var_pca_threshold = self.var_pca_threshold else: - var_pca_threshold = 100.0 + var_pca_threshold = 99#100.0 # Instantiate and fit sklearnPCA object covar_matrix = sklearnPCA(n_components=None) covar_matrix.fit(target)