diff --git a/examples/analytical-function/example_analytical_function_gp.py b/examples/analytical-function/example_analytical_function_gp.py new file mode 100644 index 0000000000000000000000000000000000000000..9788ac7011d4f2b54f653c0631a617dd316fa98f --- /dev/null +++ b/examples/analytical-function/example_analytical_function_gp.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This test shows a surrogate-assisted Bayesian calibration of a time dependent + analytical function. + +Author: Farid Mohammadi, M.Sc. +E-Mail: farid.mohammadi@iws.uni-stuttgart.de +Department of Hydromechanics and Modelling of Hydrosystems (LH2) +Institute for Modelling Hydraulic and Environmental Systems (IWS), University +of Stuttgart, www.iws.uni-stuttgart.de/lh2/ +Pfaffenwaldring 61 +70569 Stuttgart + +Created on Fri Aug 9 2019 + +""" + +import numpy as np +import pandas as pd +import sys +import joblib +import matplotlib + +matplotlib.use('agg') + +# import bayesvalidrox +# Add BayesValidRox path +sys.path.append("../../src/") +print(sys.path) + +from bayesvalidrox import PyLinkForwardModel, Input, ExpDesigns, GPESkl, PostProcessing, BayesInference, Discrepancy, \ + Engine + +if __name__ == "__main__": + + # Number of parameters + ndim = 10 # 2, 10 + + # ===================================================== + # ============= COMPUTATIONAL MODEL ================ + # ===================================================== + Model = PyLinkForwardModel() + + Model.link_type = 'Function' + Model.py_file = 'analytical_function' + Model.name = 'AnalyticFunc' + + Model.Output.names = ['Z'] + + # For Bayesian inversion synthetic data with X=[0,0] + Model.observations = {} + Model.observations['Time [s]'] = np.arange(0, 10, 1.) / 9 + Model.observations['Z'] = np.repeat([2.], 10) + + # For Checking with the MonteCarlo refrence + Model.mc_reference = {} + Model.mc_reference['Time [s]'] = np.arange(0, 10, 1.) / 9 + Model.mc_reference['mean'] = np.load(f"data/mean_{ndim}.npy") + Model.mc_reference['std'] = np.load(f"data/std_{ndim}.npy") + + # ===================================================== + # ========= PROBABILISTIC INPUT MODEL ============== + # ===================================================== + # Define the uncertain parameters with their mean and + # standard deviation + Inputs = Input() + + # Assuming dependent input variables + # Inputs.Rosenblatt = True + + for i in range(ndim): + Inputs.add_marginals() + Inputs.Marginals[i].name = "$\\theta_{" + str(i + 1) + "}$" + Inputs.Marginals[i].dist_type = 'uniform' + Inputs.Marginals[i].parameters = [-5, 5] + + # arbitrary polynomial chaos + # inputParams = np.load('data/InputParameters_{}.npy'.format(ndim)) + # for i in range(ndim): + # Inputs.add_marginals() + # Inputs.Marginals[i].name = f'$X_{i+1}$' + # Inputs.Marginals[i].input_data = inputParams[:, i] + + # ===================================================== + # ========== DEFINITION OF THE METAMODEL ============ + # ===================================================== + MetaModelOpts = GPESkl(Inputs) # , Model) + + # Select if you want to preserve the spatial/temporal depencencies + # MetaModelOpts.dim_red_method = 'PCA' + # MetaModelOpts.var_pca_threshold = 99.999 + # MetaModelOpts.n_pca_components = 10 + + # ToDo: This should not be an option for GPEs + # Select your metamodel method + # 1) PCE (Polynomial Chaos Expansion) 2) aPCE (arbitrary PCE) + # 3) GPE (Gaussian Process Emulator) + MetaModelOpts.meta_model_type = 'GPE' + + # ------------------------------------------------ + # ------------- GPE Specification ---------------- + # ------------------------------------------------ + # Select the solver for solving for the GP Hyperparameters using the ML approach + # ToDo: Remove this as a user-defined parameter, since only one is available? + # 1)LBFGS: only option for Scikit Learn + MetaModelOpts._gpe_reg_method = 'LBFGS' + + # Kernel options ---------------------------- + # Loop over different Kernels: + # 1) True to loop over the different kernel types and select the best one + MetaModelOpts._autoSelect = False + + # Select Kernel type: + # 1) RBF: Gaussian/squared exponential kernel + # 2) Matern Kernel + # 3) RQ: Rational Quadratic kernel + MetaModelOpts._kernel_type = 'RBF' + + MetaModelOpts._kernel_isotropy = False # Kernel isotropy + MetaModelOpts._nugget = 1e-9 # Set regularization parameter (constant) + MetaModelOpts._kernel_noise = False # Optimize regularization parameter + + # Bootstraping + # 1) normal 2) fast + # MetaModelOpts.bootstrap_method = 'fast' # ToDo: Remove this as a user-defined parameter, not used for GP? + MetaModelOpts.n_bootstrap_itrs = 1 + + # Print summary of the regression results + # MetaModelOpts.verbose = True + + # ------------------------------------------------ + # ------ Experimental Design Configuration ------- + # ------------------------------------------------ + ExpDesign = ExpDesigns(Inputs) + + # One-shot (normal) or Sequential Adaptive (sequential) Design + ExpDesign.method = 'sequential' + ExpDesign.n_init_samples = 140 # 00#3*ndim + + # Sampling methods + # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley + # 6) chebyshev(FT) 7) grid(FT) 8)user + ExpDesign.sampling_method = 'sobol' + + # Provide the experimental design object with a hdf5 file + # ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5' + + # Set the sampling parameters + ExpDesign.n_new_samples = 100 + ExpDesign.n_max_samples = 141 # 150 # sum of init + sequential + ExpDesign.mod_LOO_threshold = 1e-16 + + # ExpDesign.adapt_verbose = True + # 1) None 2) 'equal' 3)'epsilon-decreasing' 4) 'adaptive' + ExpDesign.tradeoff_scheme = None + # MetaModelOpts.ExpDesign.n_replication = 5 + # -------- Exploration ------ + # 1)'Voronoi' 2)'random' 3)'latin_hypercube' 4)'LOOCV' 5)'dual annealing' + ExpDesign.explore_method = 'random' + + # Use when 'dual annealing' chosen + ExpDesign.max_func_itr = 1000 + + # Use when 'Voronoi' or 'random' or 'latin_hypercube' chosen + ExpDesign.n_canddidate = 1000 + ExpDesign.n_cand_groups = 4 + + # -------- Exploitation ------ + # 1)'BayesOptDesign' 2)'BayesActDesign' 3)'VarOptDesign' 4)'alphabetic' + # 5)'Space-filling' + ExpDesign.exploit_method = 'Space-filling' + ExpDesign.exploit_method = 'BayesActDesign' + ExpDesign.util_func = 'DKL' + + # BayesOptDesign/BayesActDesign -> when data is available + # 1) MI (Mutual information) 2) ALC (Active learning McKay) + # 2)DKL (Kullback-Leibler Divergence) 3)DPP (D-Posterior-percision) + # 4)APP (A-Posterior-percision) # ['DKL', 'BME', 'infEntropy'] + # ExpDesign.util_func = 'DKL' + + # BayesActDesign -> when data is available + # 1) BME (Bayesian model evidence) 2) infEntropy (Information entropy) + # 2)DKL (Kullback-Leibler Divergence) + # ExpDesign.util_func = 'DKL' + + # VarBasedOptDesign -> when data is not available + # 1)ALM 2)EIGF, 3)LOOCV + # or a combination as a list + # ExpDesign.util_func = 'EIGF' + + # alphabetic + # 1)D-Opt (D-Optimality) 2)A-Opt (A-Optimality) + # 3)K-Opt (K-Optimality) or a combination as a list + # ExpDesign.util_func = 'D-Opt' + + # Defining the measurement error, if it's known a priori + obsData = pd.DataFrame(Model.observations, columns=Model.Output.names) + DiscrepancyOpts = Discrepancy('') + DiscrepancyOpts.type = 'Gaussian' + DiscrepancyOpts.parameters = obsData ** 2 + MetaModelOpts.Discrepancy = DiscrepancyOpts + + # Plot the posterior snapshots for SeqDesign + ExpDesign.post_snapshot = False + ExpDesign.step_snapshot = 1 + ExpDesign.max_a_post = [0] * ndim + + # For calculation of validation error for SeqDesign + prior = np.load(f"data/Prior_{ndim}.npy") + prior_outputs = np.load(f"data/origModelOutput_{ndim}.npy") + likelihood = np.load(f"data/validLikelihoods_{ndim}.npy") + ExpDesign.valid_samples = prior[:500] + ExpDesign.valid_model_runs = {'Z': prior_outputs[:500]} + + # Run using the engine + engine = Engine(MetaModelOpts, Model, ExpDesign) + # engine.train_sequential() + engine.train_normal() + + # Load the objects + # with open(f"PCEModel_{Model.name}.pkl", "rb") as input: + # PCEModel = joblib.load(input) + + # ===================================================== + # ========= POST PROCESSING OF METAMODELS =========== + # ===================================================== + PostPCE = PostProcessing(engine) + + # Plot to check validation visually. + PostPCE.valid_metamodel(n_samples=1) + + # Compute and print RMSE error + PostPCE.check_accuracy(n_samples=300) + + # ===================================================== + # ======== Bayesian inference with Emulator ========== + # ===================================================== + BayesOpts = BayesInference(engine) + BayesOpts.emulator = True + BayesOpts.plot_post_pred = True + + # BayesOpts.selected_indices = [0, 3, 5, 7, 9] + # BME Bootstrap + BayesOpts.bootstrap = True + BayesOpts.n_bootstrap_itrs = 500 + BayesOpts.bootstrap_noise = 100 + + # Bayesian cross validation + BayesOpts.bayes_loocv = True # TODO: test what this does + + # Select the inference method + import emcee + + BayesOpts.inference_method = "MCMC" + # Set the MCMC parameters passed to self.mcmc_params + BayesOpts.mcmc_params = { + 'n_steps': 1e4, # 5, + 'n_walkers': 30, + 'moves': emcee.moves.KDEMove(), + 'multiprocessing': False, + 'verbose': False + } + + # ----- Define the discrepancy model ------- + obsData = pd.DataFrame(Model.observations, columns=Model.Output.names) + BayesOpts.measurement_error = obsData + + # # -- (Option B) -- + DiscrepancyOpts = Discrepancy('') + DiscrepancyOpts.type = 'Gaussian' + DiscrepancyOpts.parameters = obsData ** 2 + BayesOpts.Discrepancy = DiscrepancyOpts + + # -- (Option C) -- + if 0: + DiscOutputOpts = Input() + # # # OutputName = 'Z' + DiscOutputOpts.add_marginals() + DiscOutputOpts.Marginals[0].Nnme = '$\sigma^2_{\epsilon}$' + DiscOutputOpts.Marginals[0].dist_type = 'uniform' + DiscOutputOpts.Marginals[0].parameters = [0, 10] + # BayesOpts.Discrepancy = {'known': DiscrepancyOpts, + # 'infer': Discrepancy(DiscOutputOpts)} + + BayesOpts.bias_inputs = {'Z': np.arange(0, 10, 1.).reshape(-1, 1) / 9} + + DiscOutputOpts = Input() + # OutputName = 'lambda' + DiscOutputOpts.add_marginals() + DiscOutputOpts.Marginals[0].name = '$\lambda$' + DiscOutputOpts.Marginals[0].dist_type = 'uniform' + DiscOutputOpts.Marginals[0].parameters = [0, 1] + + # # OutputName = 'sigma_f' + DiscOutputOpts.add_marginals() + DiscOutputOpts.Marginals[1].Name = '$\sigma_f$' + DiscOutputOpts.Marginals[1].dist_type = 'uniform' + DiscOutputOpts.Marginals[1].parameters = [0, 1e-4] + # BayesOpts.Discrepancy = Discrepancy(DiscOutputOpts) + BayesOpts.Discrepancy = {'known': DiscrepancyOpts, + 'infer': Discrepancy(DiscOutputOpts)} + + # Start the calibration/inference + Bayes_PCE = BayesOpts.create_inference() + + # Save class objects + with open(f'Bayes_{Model.name}.pkl', 'wb') as output: + joblib.dump(Bayes_PCE, output, 2) diff --git a/src/bayesvalidrox/__init__.py b/src/bayesvalidrox/__init__.py index 55cfa8ed17ae44c3be074d37d2b93a305dd0fe64..a824212ed6421c15ef2f2e9deb239f1dfead0ecd 100644 --- a/src/bayesvalidrox/__init__.py +++ b/src/bayesvalidrox/__init__.py @@ -4,6 +4,7 @@ __version__ = "1.1.0" from .pylink.pylink import PyLinkForwardModel from .surrogate_models.surrogate_models import MetaModel from .surrogate_models.polynomial_chaos import PCE +from .surrogate_models.gaussian_process_sklearn import GPESkl from .surrogate_models.engine import Engine from .surrogate_models.inputs import Input from .surrogate_models.exp_designs import ExpDesigns @@ -23,5 +24,6 @@ __all__ = [ "ExpDesigns", "PostProcessing", "BayesInference", - "BayesModelComparison" + "BayesModelComparison", + "GPESkl" ] diff --git a/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py b/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py new file mode 100644 index 0000000000000000000000000000000000000000..e50cc20add44bc4a5d5bc44dabbe00e006b0f6ef --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/gaussian_process_sklearn.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Implementation of metamodel as GPE, using the Scikit-Learn library +""" + +import copy +import math +import os +import warnings + +import functools +import matplotlib.pyplot as plt +import numpy as np +import sklearn.gaussian_process.kernels as kernels +from joblib import Parallel, delayed +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.preprocessing import MinMaxScaler, StandardScaler +from tqdm import tqdm + +from .surrogate_models import MetaModel, _preprocessing_fit, _bootstrap_fit, _preprocessing_eval, _bootstrap_eval + +warnings.filterwarnings("ignore") +# warnings.simplefilter('default') +# Load the mplstyle +# noinspection SpellCheckingInspection +plt.style.use(os.path.join(os.path.split(__file__)[0], + '../', 'bayesvalidrox.mplstyle')) + + +class MySklGPE(GaussianProcessRegressor): + """ + GP ScikitLearn class, to change the default values for maximum iterations and optimization tolerance. + """ + def __init__(self, max_iter=10_000, gtol=1e-6, **kwargs): + super().__init__(**kwargs) + self.max_iter = max_iter + self.gtol = gtol + + +class GPESkl(MetaModel): + """ + GP MetaModel using the Scikit-Learn library + + This class trains a surrogate model of type Gaussian Process Regression. + It accepts an input object (input_obj) + containing the specification of the distributions for uncertain parameters + and a model object with instructions on how to run the computational model. + + Attributes + ---------- + input_obj : obj + Input object with the information on the model input parameters. + _meta_model_type : str + Surrogate model types, in this case GPE. + Default is GPE. + gpe_reg_method : str + GPE regression method to compute the kernel hyperparameters. The following + regression methods are available for Scikit-Learn library + 1. LBFGS: + Default is `LBFGS`. + autoSelect: bool + Flag to loop through different available Kernels and select the best one based on BME criteria. + Default is False. + kernel_type: str + Type of kernel to use and train for. The following Scikit-Learn kernels are available: + 1. RBF: Squared exponential kernel + 2. Matern: Matern kernel + 3. RQ: rational quadratic kernel + Default is `'RBF'` kernel. + isotropy: bool + Flat to train an isotropic kernel (one length scale for all input parameters) or + an anisotropic kernel (a length scale for each dimension). True for isotropy, False for anisotropic kernel + Default is True + noisy: bool + Consider a WhiteKernel for regularization purposes, and optimize for the noise hyperparameter. + Default is False + nugget: float + Constant value added to the Kernel matrix for regularization purposes (not optimized) + Default is 1e-9 + + bootstrap_method : str + Bootstraping method. Options are `'normal'` and `'fast'`. The default + is `'fast'`. It means that in each iteration except the first one, only + the coefficent are recalculated with the ordinary least square method. + n_bootstrap_itrs : int + Number of iterations for the bootstrap sampling. The default is `1`. + dim_red_method : str + Dimensionality reduction method for the output space. The available + method is based on principal component analysis (PCA). The Default is + `'no'`. There are two ways to select number of components: use + percentage of the explainable variance threshold (between 0 and 100) + (Option A) or direct prescription of components' number (Option B): + >>> MetaModelOpts = MetaModel() + >>> MetaModelOpts.dim_red_method = 'PCA' + >>> MetaModelOpts.var_pca_threshold = 99.999 # Option A + >>> MetaModelOpts.n_pca_components = 12 # Option B + verbose : bool + Prints summary of the regression results. Default is `False`. + """ + + def __init__(self, input_obj, meta_model_type='GPE', gpe_reg_method='lbfgs', + autoSelect=False, kernel_type='RBF', isotropy=True, noisy=False, nugget=1e-9, + bootstrap_method='fast', # ToDo: Remove this as a user-defined parameter, not used for GP? + n_bootstrap_itrs=1, + dim_red_method='no', verbose=False): + + # Check if the surrogate outputs gaussian results: Always TRUE for GPs + is_gaussian = True # self.check_is_gaussian(n_bootstrap_itrs) + + # Use parent init + super().__init__(input_obj, meta_model_type, bootstrap_method, + n_bootstrap_itrs, dim_red_method, is_gaussian, verbose) + + # Additional inputs + # Parameters that are not needed from the outside are denoted with '_' + self.meta_model_type = meta_model_type + self._gpe_reg_method = gpe_reg_method + self.regression_dict = {} + self._gpe_reg_options = {} + + self._autoSelect = autoSelect + self._kernel_isotropy = isotropy + self._kernel_noise = noisy + self._kernel_type = kernel_type + self._nugget = nugget + + # Is currently needed inputs for 'engine', but not for the GPE class: + self._pce_deg = 1 + + # Other params + self._gp_poly = None + self._x_scaler = None + self._bme_score = None + self._kernel_name_dict = None + # self._LCerror = None + + # Initialize the regression options as a dictionary + + def check_is_gaussian(self, n_bootstrap_itrs) -> bool: + """ + Check if the metamodel returns a mean and stdev. + + Returns + ------- + bool + TRUE + + """ + return True + + def build_metamodel(self) -> None: + """ + Builds the parts for the metamodel (,...) that are needed before fitting. + This is executed outside any loops related to e.g. bootstrap or + transformations such as pca. + + Returns + ------- + None + + """ + # Initialize the nested dictionaries + self._gp_poly = self.auto_vivification() + self._x_scaler = self.auto_vivification() + self._bme_score = self.auto_vivification() + self._kernel_name_dict = self.auto_vivification() + # self._LCerror = self.auto_vivification() + + def build_kernels(self): + """ + Initializes the different possible kernels, and selects the ones to train for, + depending on the input options. + ToDo: Add additional kernels + ToDo: Add option to include user-defined kernel + Returns + ------- + List: with the kernels to iterate over + List: with the names of the kernels to iterate over + """ + _ndim = self.InputSpace.ndim + + # Set boundaries for length scales: + value = np.empty((), dtype=object) + value[()] = [1e-5, 1e5] + + if self._kernel_isotropy: + # Isotropic Kernel + ls_bounds = list(np.full(1, value, dtype=object)) + ls_values = 1 + else: + ls_bounds = list(np.full(_ndim, value, dtype=object)) + ls_values = list(np.full(_ndim, 1)) + + # Generate list of probable kernels: + rbf_kernel = 1*kernels.RBF(length_scale=ls_values, + length_scale_bounds=ls_bounds) + matern_kernel = 1*kernels.Matern(length_scale=ls_values, + length_scale_bounds=ls_bounds, + nu=1.5) + rq_kernel = 1*kernels.RationalQuadratic(length_scale=ls_values, + length_scale_bounds=ls_bounds, + alpha=1) + kernel_dict = {'RBF': rbf_kernel, + 'Matern': matern_kernel, + 'RQ': rq_kernel} + + if self._autoSelect: + kernel_list = list(kernel_dict.values()) + kernel_names = list(kernel_dict.keys()) + else: + try: + kernel_list = [kernel_dict[self._kernel_type]] + kernel_names = [self._kernel_type] + except: + print(f'The kernel option {self._kernel_type} is not available. An RBF kernel was chosen instead') + kernel_list = [kernel_dict['RBF']] + kernel_names = ['RBF'] + + return kernel_list, kernel_names + + @staticmethod + def transform_x(X: np.array, transform_type='norm'): + """ + Scales the inputs (X) during training using either normalize ([0, 1]), or standardize (N[0, 1]). + If None, then the inputs are not scaled + Parameters + ---------- + X: 2D list or np.array of shape (#samples, #dim) + The parameter value combinations to train the model with. + transform_type: str + Transformation to apply to the input parameters. Default is None + + Returns + ------- + np.array: (#samples, #dim) + transformed input parameters + obj: Scaler object + transformation object, for future transformations during surrogate evaluation + + """ + if transform_type is None: + scaler = None + X_S = X + else: + if transform_type.lower() == 'norm': + scaler = MinMaxScaler() + X_S = scaler.fit_transform(X) + elif transform_type.lower() == 'standard': + scaler = StandardScaler() + X_S = scaler.fit_transform(X) + else: + print(f'No scaler {transform_type} found. No scaling was done') + scaler = None + X_S = X + + return X_S, scaler + + @_preprocessing_fit + @_bootstrap_fit + def fit(self, X: np.array, y: dict, parallel=False, verbose=False, b_i=0): + """ + Fits the surrogate to the given data (samples X, outputs y). + Note here that the samples X should be the transformed samples provided + by the experimental design if the transformation is used there. + + Parameters + ---------- + X : 2D list or np.array of shape (#samples, #dim) + The parameter value combinations that the model was evaluated at. + y : dict of 2D lists or arrays of shape (#samples, #timesteps) + The respective model evaluations. + parallel : bool + Set to True to run the training in parallel for various keys. + The default is False. + verbose : bool + Set to True to obtain more information during runtime. + The default is False. + + Returns + ------- + None. + + """ + + # For loop over the components/outputs + if self.verbose and self.n_bootstrap_itrs == 1: + items = tqdm(y.items(), desc="Fitting regression") + else: + items = y.items() + + # Transform inputs: + # ToDO: Do we do this although the inputs are already transformed in the preprocessing decorator? + # Todo: Is this better here or in the adaptive_regression function? + X_S, scaler = self.transform_x(X=X) #, transform_type=None) + self._x_scaler[f'b_{b_i + 1}'] = scaler + + for key, output in items: + # Parallel fit regression + out = None + if parallel: # and (not self.fast_bootstrap or b_i == 0): + out = Parallel(n_jobs=-1, backend='multiprocessing')( + delayed(self.adaptive_regression)(X_S, + output[:, idx], + idx, self.verbose) + for idx in range(output.shape[1])) + elif not parallel: # and (not self.fast_bootstrap or b_i == 0): + results = map(functools.partial(self.adaptive_regression, X_S, verbose=self.verbose), + [output[:, idx] for idx in + range(output.shape[1])], + range(output.shape[1])) + out = list(results) + + # Create a dict to pass the variables + for i in range(output.shape[1]): + self._gp_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['gp'] + self._bme_score[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['BME'] + self._kernel_name_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['kernel_name'] + + # -------------------------------------------------------------------------------------------------------- + def adaptive_regression(self, X, y, varIdx, verbose=False): + """ + Adaptively fits the GPE model by comparing different Kernel options + + Parameters + ---------- + X : array of shape (n_samples, ndim) + Training set. These samples should be already transformed. + y : array of shape (n_samples,) + Target values, i.e. simulation results for the Experimental design. + varIdx : int + Index of the output. + verbose : bool, optional + Print out summary. The default is False. + + Returns + ------- + returnVars : Dict + Fitted estimator, BME score + + """ + + # Initialization + gp_list = {} + BME = [] + + # Get kernels: + kernel_list, kernel_names = self.build_kernels() + + for i, K in enumerate(kernel_list): + if self._kernel_noise: + K = K + kernels.WhiteKernel(noise_level=np.std(y)/math.sqrt(2)) + + # ToDo: Check if we want to increase number of iterations/tolerance. Set n_restars as variable? + # gp_list[i] = MySklGPE(kernel=K, + # n_restarts_optimizer=20, + # alpha=self._nugget, + # normalize_y=True) + # + gp_list[i] = GaussianProcessRegressor(kernel=K, + n_restarts_optimizer=10, + alpha=self._nugget, + normalize_y=True) + + # Fit to data using Maximum Likelihood Estimation + gp_list[i].fit(X, y) + + # Store the MLE as BME score + BME.append(gp_list[i].log_marginal_likelihood()) + + # Select the GP with the highest BME + idx_max = np.argmax(BME) + gp = gp_list[idx_max] + + if varIdx is not None and verbose: + gp_score = gp.score(X, y) + print('=' * 50) + print(' ' * 10 + ' Summary of results ') + print('=' * 50) + + print(f'Output variable {varIdx}:') + print('The estimation of GPE coefficients converged,') + print(f'with the R^2 score: {gp_score:.3f} ') + print(f'using a {kernel_names[idx_max]} Kernel') + print('=' * 50) + + # Create a dict to pass the outputs + # ToDo: Save the Kernel hyperparameters? + returnVars = {} + returnVars['gp'] = gp + returnVars['BME'] = np.argmax(BME) + returnVars['kernel_name'] = kernel_names[idx_max] + + return returnVars + + # ------------------------------------------------------------------------- + @staticmethod + def scale_x(X: np.array, transform_obj: object): + """ + Transforms the inputs based on the scaling done during training + Parameters + ---------- + X: 2D list or np.array of shape (#samples, #dim) + The parameter value combinations to evaluate the model with. + transform_obj: Scikit-Learn object + Class instance to transform inputs + + Returns + ------- + np.array (#samples, #dim) + Transformed input sets + """ + if transform_obj is None: + x_t = X + else: + x_t = transform_obj.transform(X) + return x_t + + @_preprocessing_eval + @_bootstrap_eval + def eval_metamodel(self, samples, b_i=0): + """ + Evaluates GP metamodel at the requested samples. + + Parameters + ---------- + samples : array of shape (n_samples, ndim), optional + Samples to evaluate metamodel at. The default is None. + + Returns + ------- + mean_pred : dict + Mean of the predictions. + std_pred : dict + Standard deviation of the predictions. + """ + # Transform the input parameters + samples_sc = self.scale_x(X=samples, transform_obj=self._x_scaler[f'b_{b_i + 1}']) + + # Extract model dictionary + model_dict = self._gp_poly[f'b_{b_i + 1}'] + + # Loop over outputs + mean_pred = {} + std_pred = {} + + for output, values in model_dict.items(): + mean = np.empty((len(samples), len(values))) + std = np.empty((len(samples), len(values))) + idx = 0 + for in_key, _ in values.items(): + + gp = self._gp_poly[f'b_{b_i + 1}'][output][in_key] + + # Prediction + y_mean, y_std = gp.predict(samples_sc, return_std=True) + + mean[:, idx] = y_mean + std[:, idx] = y_std + idx += 1 + + mean_pred[output] = mean + std_pred[output] = std + + return mean_pred, std_pred +