diff --git a/.coverage b/.coverage index 5a50f3b44fe726ce44b701f18e81b5a7962fe24f..fb40a106ecddff155efa3de1fc357ccf13425598 100644 Binary files a/.coverage and b/.coverage differ diff --git a/examples/analytical-function/test_analytical_function.py b/examples/analytical-function/test_analytical_function.py index edb0cee0c00ca98c64c1f5b7a2ac8b076471f688..e356d37da7c51b12990e4392d87805f6bad06010 100644 --- a/examples/analytical-function/test_analytical_function.py +++ b/examples/analytical-function/test_analytical_function.py @@ -20,20 +20,23 @@ import numpy as np import pandas as pd import sys import joblib +import matplotlib +matplotlib.use('agg') + # import bayesvalidrox # Add BayesValidRox path sys.path.append("src/") from bayesvalidrox.pylink.pylink import PyLinkForwardModel from bayesvalidrox.surrogate_models.inputs import Input +from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns from bayesvalidrox.surrogate_models.surrogate_models import MetaModel from bayesvalidrox.surrogate_models.meta_model_engine import MetaModelEngine from bayesvalidrox.post_processing.post_processing import PostProcessing from bayesvalidrox.bayes_inference.bayes_inference import BayesInference from bayesvalidrox.bayes_inference.discrepancy import Discrepancy -import matplotlib -matplotlib.use('agg') +from bayesvalidrox.surrogate_models.engine import Engine if __name__ == "__main__": @@ -88,7 +91,7 @@ if __name__ == "__main__": # ===================================================== # ========== DEFINITION OF THE METAMODEL ============ # ===================================================== - MetaModelOpts = MetaModel(Inputs, Model) + MetaModelOpts = MetaModel(Inputs)#, Model) # Select if you want to preserve the spatial/temporal depencencies # MetaModelOpts.dim_red_method = 'PCA' @@ -134,24 +137,33 @@ if __name__ == "__main__": # ------------------------------------------------ # ------ Experimental Design Configuration ------- # ------------------------------------------------ - MetaModelOpts.add_ExpDesign() + ExpDesign = ExpDesigns(Inputs) # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.method = 'normal' - MetaModelOpts.ExpDesign.n_init_samples = 3*ndim + ExpDesign.method = 'normal' + ExpDesign.n_init_samples = 150#3*ndim # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley # 6) chebyshev(FT) 7) grid(FT) 8)user - MetaModelOpts.ExpDesign.sampling_method = 'latin_hypercube' + ExpDesign.sampling_method = 'latin_hypercube' # Provide the experimental design object with a hdf5 file # MetaModelOpts.ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5' - - # Train the meta model - meta_model_engine = MetaModelEngine(MetaModelOpts) - meta_model_engine.run() - PCEModel = meta_model_engine.MetaModel + + if 0: + # Train the meta model with the old engine + MetaModelOpts.ExpDesign = ExpDesign + meta_model_engine = MetaModelEngine(MetaModelOpts) + meta_model_engine.run() + PCEModel = meta_model_engine.MetaModel + + if 1: + # Test the new engine + engine = Engine(MetaModelOpts, Model, ExpDesign) + engine.start_engine() + engine.train_normal() + PCEModel = engine.MetaModel # Save PCE models with open(f'PCEModel_{Model.name}.pkl', 'wb') as output: @@ -164,7 +176,7 @@ if __name__ == "__main__": # ===================================================== # ========= POST PROCESSING OF METAMODELS =========== # ===================================================== - PostPCE = PostProcessing(PCEModel) + PostPCE = PostProcessing(PCEModel, Model) # Plot to check validation visually. PostPCE.valid_metamodel(n_samples=1) diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 4db354bbd541affdc50a8dce37eb293188091f73..0b77fa3802dcde9399990a8d3dd2f962de444693 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -32,8 +32,9 @@ class PostProcessing: expected to be performed change this to `'valid'`. """ - def __init__(self, MetaModel, name='calib'): + def __init__(self, MetaModel, Model, name='calib'): self.MetaModel = MetaModel + self.ModelObj = Model self.name = name # ------------------------------------------------------------------------- @@ -60,7 +61,7 @@ class PostProcessing: bar_plot = True if plot_type == 'bar' else False meta_model_type = self.MetaModel.meta_model_type - Model = self.MetaModel.ModelObj + Model = self.ModelObj # Read Monte-Carlo reference self.mc_reference = Model.read_mc_reference() @@ -170,7 +171,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - Model = MetaModel.ModelObj + Model = self.ModelObj if samples is None: self.n_samples = n_samples @@ -229,7 +230,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - Model = MetaModel.ModelObj + Model = self.ModelObj # Set the number of samples if n_samples: @@ -570,7 +571,7 @@ class PostProcessing: sobol_cell_, total_sobol_ = {}, {} - for output in PCEModel.ModelObj.Output.names: + for output in self.ModelObj.Output.names: n_meas_points = len(coeffs_dict[f'b_{b_i+1}'][output]) @@ -758,7 +759,7 @@ class PostProcessing: total_sobol_all[k][i] = v self.total_sobol = {} - for output in PCEModel.ModelObj.Output.names: + for output in self.ModelObj.Output.names: self.total_sobol[output] = np.mean(total_sobol_all[output], axis=0) # ---------------- Plot ----------------------- @@ -771,7 +772,7 @@ class PostProcessing: fig = plt.figure() - for outIdx, output in enumerate(PCEModel.ModelObj.Output.names): + for outIdx, output in enumerate(self.ModelObj.Output.names): # Extract total Sobol indices total_sobol = self.total_sobol[output] @@ -961,7 +962,7 @@ class PostProcessing: self.n_samples = 1000 PCEModel = self.MetaModel - Model = self.MetaModel.ModelObj + Model = self.ModelObj n_samples = self.n_samples # Create 3D-Grid @@ -1063,7 +1064,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - outputs = MetaModel.ModelObj.Output.names + outputs = self.ModelObj.Output.names pce_means_b = {} pce_stds_b = {} @@ -1174,7 +1175,7 @@ class PostProcessing: Dictionary of results. """ - Model = self.MetaModel.ModelObj + Model = self.ModelObj if samples is None: samples = self._get_sample() @@ -1274,7 +1275,7 @@ class PostProcessing: None. """ - Model = self.MetaModel.ModelObj + Model = self.ModelObj newpath = f'Outputs_PostProcessing_{self.name}/' if not os.path.exists(newpath): diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py new file mode 100644 index 0000000000000000000000000000000000000000..6320b6e7e5d744882ed23d3fe3572fdb1a653372 --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/engine.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Dec 16 10:16:50 2023 +Engine to train the surrogate with + +@author: Rebecca Kohlhaas +""" +import h5py +import numpy as np +import os + +from .inputs import Input +from .exp_designs import ExpDesigns +from .surrogate_models import MetaModel + +class Engine(): + + + def __init__(self, MetaMod, Model, ExpDes): + self.MetaModel = MetaMod + self.Model = Model + self.ExpDesign = ExpDes + + def start_engine(self) -> None: + """ + Do all the preparations that need to be run before the actual training + + Returns + ------- + None + + """ + self.out_names = self.Model.Output.names + self.MetaModel.out_names = self.out_names + + # Add expDesign to MetaModel if it does not have any + if not hasattr(self.MetaModel, 'ExpDesign'): + self.MetaModel.ExpDesign = self.ExpDesign + + def train_normal(self, parallel = False, verbose = False) -> None: + """ + Trains surrogate on static samples only. + Samples are taken from the experimental design and the specified + model is run on them. + Alternatively the samples can be read in from a provided hdf5 file. + + Returns + ------- + None + + """ + + ExpDesign = self.ExpDesign + MetaModel = self.MetaModel + # Read ExpDesign (training and targets) from the provided hdf5 + if ExpDesign.hdf5_file is not None: + # TODO: need to run 'generate_ED' as well after this or not? + ExpDesign.read_from_file(self.out_names) + else: + # Check if an old hdf5 file exists: if yes, rename it + hdf5file = f'ExpDesign_{self.Model.name}.hdf5' + if os.path.exists(hdf5file): + os.rename(hdf5file, 'old_'+hdf5file) + + # ---- Prepare X samples ---- + # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on + ExpDesign.generate_ED(ExpDesign.n_init_samples, + transform=True, + max_pce_deg=np.max(MetaModel.pce_deg)) + + # ---- Run simulations at X ---- + if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: + print('\n Now the forward model needs to be run!\n') + ED_Y, up_ED_X = self.Model.run_model_parallel(ExpDesign.X) + ExpDesign.Y = ED_Y + else: + # Check if a dict has been passed. + if not type(ExpDesign.Y) is dict: + raise Exception('Please provide either a dictionary or a hdf5' + 'file to ExpDesign.hdf5_file argument.') + + # Separate output dict and x-values + if 'x_values' in ExpDesign.Y: + ExpDesign.x_values = ExpDesign.Y['x_values'] + del ExpDesign.Y['x_values'] + + MetaModel.ndim = ExpDesign.ndim + + # Build and train the MetaModel on the static samples + MetaModel.CollocationPoints = ExpDesign.X_tr + MetaModel.build_metamodel() + + # Fit the surrogate + MetaModel.fit(ExpDesign.X_tr, ExpDesign.Y, parallel, verbose) + + + def train_sequential(self, parallel = False, verbose = False) -> None: + """ + Train the surrogate in a sequential manner. + First build and train evereything on the static samples, then iterate + choosing more samples and refitting the surrogate on them. + + Returns + ------- + None + + """ + self.train_normal(parallel, verbose) \ No newline at end of file diff --git a/src/bayesvalidrox/surrogate_models/exp_designs.py b/src/bayesvalidrox/surrogate_models/exp_designs.py index 602717cf7e7b3ffa8a2cea7e73c9c235988376a5..3e26c8153fa2dcf2b744f51d878e7de4f176f31b 100644 --- a/src/bayesvalidrox/surrogate_models/exp_designs.py +++ b/src/bayesvalidrox/surrogate_models/exp_designs.py @@ -202,7 +202,7 @@ class ExpDesigns: step_snapshot=1, max_a_post=[], adapt_verbose=False): self.InputObj = Input - self.method = method + self.method = method # TODO: this still doing sth? self.meta_Model_type = meta_Model_type self.sampling_method = sampling_method self.hdf5_file = hdf5_file @@ -344,14 +344,15 @@ class ExpDesigns: Returns ------- - samples : array of shape (n_samples, n_params) - Selected training samples. + None """ - Inputs = self.InputObj - if not hasattr(self, 'n_init_samples'): - self.n_init_samples = self.ndim + 1 + if n_samples <0: + raise ValueError('A negative number of samples cannot be created. Please provide positive n_samples') n_samples = int(n_samples) + + if not hasattr(self, 'n_init_samples'): + self.n_init_samples = n_samples # Generate the samples based on requested method self.init_param_space(max_pce_deg) @@ -398,101 +399,67 @@ class ExpDesigns: method=sampling_method ) if sampling_method == 'user' or not self.apce: - return samples, tr_samples + self.X = samples + self.X_tr = tr_samples else: - return tr_samples, samples + self.X = tr_samples + self.X_tr = samples + #return tr_samples, samples # TODO: why is this swapped here? else: - return samples - - - # ------------------------------------------------------------------------- - def generate_training_data(self, Model, max_deg): + self.X = samples + self.X_tr = None + + def read_from_file(self, out_names): """ - Prepares the experimental design either by reading from the prescribed - data or running simulations. + Reads in the ExpDesign from a provided h5py file and saves the results. Parameters ---------- - Model : obj - Model object. - - Raises - ------ - Exception - If model sumulations are not provided properly. + out_names : list of strings + The keys that are in the outputs (y) saved in the provided file. Returns ------- - ED_X_tr: array of shape (n_samples, n_params) - Training samples transformed by an isoprobabilistic transformation. - ED_Y: dict - Model simulations (target) for all outputs. + None. + """ - if self.ExpDesignFlag != 'sequential': - # Read ExpDesign (training and targets) from the provided hdf5 - if self.hdf5_file is not None: + if self.hdf5_file == None: + raise AttributeError('ExpDesign cannot be read in, please provide hdf5 file first') - # Read hdf5 file - f = h5py.File(self.hdf5_file, 'r+') + # Read hdf5 file + f = h5py.File(self.hdf5_file, 'r+') - # Read EDX and pass it to ExpDesign object - try: - self.X = np.array(f["EDX/New_init_"]) - except KeyError: - self.X = np.array(f["EDX/init_"]) + # Read EDX and pass it to ExpDesign object + try: + self.X = np.array(f["EDX/New_init_"]) + except KeyError: + self.X = np.array(f["EDX/init_"]) - # Update number of initial samples - self.n_init_samples = self.X.shape[0] + # Update number of initial samples + self.n_init_samples = self.X.shape[0] - # Read EDX and pass it to ExpDesign object - out_names = self.ModelObj.Output.names - self.Y = {} + # Read EDX and pass it to ExpDesign object + self.Y = {} - # Extract x values - try: - self.Y["x_values"] = dict() - for varIdx, var in enumerate(out_names): - x = np.array(f[f"x_values/{var}"]) - self.Y["x_values"][var] = x - except KeyError: - self.Y["x_values"] = np.array(f["x_values"]) - - # Store the output - for varIdx, var in enumerate(out_names): - try: - y = np.array(f[f"EDY/{var}/New_init_"]) - except KeyError: - y = np.array(f[f"EDY/{var}/init_"]) - self.Y[var] = y - f.close() - else: - # Check if an old hdf5 file exists: if yes, rename it - hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' - if os.path.exists(hdf5file): - os.rename(hdf5file, 'old_'+hdf5file) - - # ---- Prepare X samples ---- - ED_X, ED_X_tr = self.generate_ED(self.n_init_samples, - transform=True, - max_pce_deg=max_deg) - self.X = ED_X - self.collocationPoints = ED_X_tr + # Extract x values + try: + self.Y["x_values"] = dict() + for varIdx, var in enumerate(out_names): + x = np.array(f[f"x_values/{var}"]) + self.Y["x_values"][var] = x + except KeyError: + self.Y["x_values"] = np.array(f["x_values"]) + + # Store the output + for varIdx, var in enumerate(out_names): + try: + y = np.array(f[f"EDY/{var}/New_init_"]) + except KeyError: + y = np.array(f[f"EDY/{var}/init_"]) + self.Y[var] = y + f.close() - # ---- Run simulations at X ---- - if not hasattr(self, 'Y') or self.Y is None: - print('\n Now the forward model needs to be run!\n') - ED_Y, up_ED_X = Model.run_model_parallel(ED_X) - self.X = up_ED_X - self.Y = ED_Y - else: - # Check if a dict has been passed. - if type(self.Y) is dict: - self.Y = self.Y - else: - raise Exception('Please provide either a dictionary or a hdf5' - 'file to ExpDesign.hdf5_file argument.') - self.X = ED_X_tr - return self.X, self.Y + # ------------------------------------------------------------------------- def init_param_space(self, max_deg=None): diff --git a/src/bayesvalidrox/surrogate_models/input_space.py b/src/bayesvalidrox/surrogate_models/input_space.py new file mode 100644 index 0000000000000000000000000000000000000000..adaa807ad913187ec5838896e6d940f0e69b9ff1 --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/input_space.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import numpy as np +import math +import itertools +import chaospy +import scipy.stats as st +from tqdm import tqdm +import h5py +import os + +from .apoly_construction import apoly_construction + + +class InputSpace: + """ + This class generates the input space for the metamodel polynomials + using the marginals defined in the `Input` object. + + Attributes + ---------- + Input : obj + Input object containing the parameter marginals, i.e. name, + distribution type and distribution parameters or available raw data. + method : str + Type of the experimental design. The default is `'normal'`. Other + option is `'sequential'`. + meta_Model_type : str + Type of the meta_Model_type. + sampling_method : str + Name of the sampling method for the experimental design. The following + sampling method are supported: + + * random + * latin_hypercube + * sobol + * halton + * hammersley + * chebyshev(FT) + * grid(FT) + * user + hdf5_file : str + Name of the hdf5 file that contains the experimental design. + n_new_samples : int + Number of (initial) training points. + n_max_samples : int + Number of maximum training points. + mod_LOO_threshold : float + The modified leave-one-out cross validation threshold where the + sequential design stops. + tradeoff_scheme : str + Trade-off scheme to assign weights to the exploration and exploitation + scores in the sequential design. + n_canddidate : int + Number of candidate training sets to calculate the scores for. + explore_method : str + Type of the exploration method for the sequential design. The following + methods are supported: + + * Voronoi + * random + * latin_hypercube + * LOOCV + * dual annealing + exploit_method : str + Type of the exploitation method for the sequential design. The + following methods are supported: + + * BayesOptDesign + * BayesActDesign + * VarOptDesign + * alphabetic + * Space-filling + util_func : str or list + The utility function to be specified for the `exploit_method`. For the + available utility functions see Note section. + n_cand_groups : int + Number of candidate groups. Each group of candidate training sets will + be evaulated separately in parallel. + n_replication : int + Number of replications. Only for comparison. The default is 1. + post_snapshot : int + Whether to plot the posterior in the sequential design. The default is + `True`. + step_snapshot : int + The number of steps to plot the posterior in the sequential design. The + default is 1. + max_a_post : list or array + Maximum a posteriori of the posterior distribution, if known. The + default is `[]`. + adapt_verbose : bool + Whether to plot the model response vs that of metamodel for the new + trining point in the sequential design. + + Note + ---------- + The following utiliy functions for the **exploitation** methods are + supported: + + #### BayesOptDesign (when data is available) + - DKL (Kullback-Leibler Divergence) + - DPP (D-Posterior-percision) + - APP (A-Posterior-percision) + + #### VarBasedOptDesign -> when data is not available + - Entropy (Entropy/MMSE/active learning) + - EIGF (Expected Improvement for Global fit) + - LOOCV (Leave-one-out Cross Validation) + + #### alphabetic + - D-Opt (D-Optimality) + - A-Opt (A-Optimality) + - K-Opt (K-Optimality) + """ + + def __init__(self, Input, method='normal', meta_Model_type='pce', + sampling_method='random', hdf5_file=None, + n_new_samples=1, n_max_samples=None, mod_LOO_threshold=1e-16, + tradeoff_scheme=None, n_canddidate=1, explore_method='random', + exploit_method='Space-filling', util_func='Space-filling', + n_cand_groups=4, n_replication=1, post_snapshot=False, + step_snapshot=1, max_a_post=[], adapt_verbose=False): + + self.InputObj = Input + self.method = method # TODO: this still doing sth? + self.meta_Model_type = meta_Model_type + self.sampling_method = sampling_method + self.hdf5_file = hdf5_file + self.n_new_samples = n_new_samples + self.n_max_samples = n_max_samples + self.mod_LOO_threshold = mod_LOO_threshold + self.explore_method = explore_method + self.exploit_method = exploit_method + self.util_func = util_func + self.tradeoff_scheme = tradeoff_scheme + self.n_canddidate = n_canddidate + self.n_cand_groups = n_cand_groups + self.n_replication = n_replication + self.post_snapshot = post_snapshot + self.step_snapshot = step_snapshot + self.max_a_post = max_a_post + self.adapt_verbose = adapt_verbose + + # Other + self.apce = None + self.ndim = None + + # Init + self.check_valid_inputs() + + + def check_valid_inputs(self)-> None: + """ + Check if the given InputObj is valid to use for further calculations: + Has some Marginals + Marginals have valid priors + All Marginals given as the same type (samples vs dist) + + Returns + ------- + None + + """ + Inputs = self.InputObj + self.ndim = len(Inputs.Marginals) + + # Check if PCE or aPCE metamodel is selected. + # TODO: test also for 'pce'?? + if self.meta_Model_type.lower() == 'apce': + self.apce = True + else: + self.apce = False + + # check if marginals given + if not self.ndim >=1: + raise AssertionError('Cannot build distributions if no marginals are given') + + # check that each marginal is valid + for marginals in Inputs.Marginals: + if len(marginals.input_data) == 0: + if marginals.dist_type == None: + raise AssertionError('Not all marginals were provided priors') + break + if np.array(marginals.input_data).shape[0] and (marginals.dist_type != None): + raise AssertionError('Both samples and distribution type are given. Please choose only one.') + break + + # Check if input is given as dist or input_data. + self.input_data_given = -1 + for marg in Inputs.Marginals: + #print(self.input_data_given) + size = np.array(marg.input_data).shape[0] + #print(f'Size: {size}') + if size and abs(self.input_data_given) !=1: + self.input_data_given = 2 + break + if (not size) and self.input_data_given > 0: + self.input_data_given = 2 + break + if not size: + self.input_data_given = 0 + if size: + self.input_data_given = 1 + + if self.input_data_given == 2: + raise AssertionError('Distributions cannot be built as the priors have different types') + + + # Get the bounds if input_data are directly defined by user: + if self.input_data_given: + for i in range(self.ndim): + low_bound = np.min(Inputs.Marginals[i].input_data) + up_bound = np.max(Inputs.Marginals[i].input_data) + Inputs.Marginals[i].parameters = [low_bound, up_bound] + + # ------------------------------------------------------------------------- + def generate_ED(self, n_samples, transform=False, + max_pce_deg=None): + """ + Generates experimental designs (training set) with the given method. + + Parameters + ---------- + n_samples : int + Number of requested training points. + sampling_method : str, optional + Sampling method. The default is `'random'`. + transform : bool, optional + Isoprobabilistic transformation. The default is `False`. + max_pce_deg : int, optional + Maximum PCE polynomial degree. The default is `None`. + + Returns + ------- + None + + """ + if n_samples <0: + raise ValueError('A negative number of samples cannot be created. Please provide positive n_samples') + n_samples = int(n_samples) + + if not hasattr(self, 'n_init_samples'): + self.n_init_samples = n_samples + + # Generate the samples based on requested method + self.init_param_space(max_pce_deg) + + + # ------------------------------------------------------------------------- + def init_param_space(self, max_deg=None): + """ + Initializes parameter space. + + Parameters + ---------- + max_deg : int, optional + Maximum degree. The default is `None`. + + Returns + ------- + raw_data : array of shape (n_params, n_samples) + Raw data. + bound_tuples : list of tuples + A list containing lower and upper bounds of parameters. + + """ + Inputs = self.InputObj + ndim = self.ndim + rosenblatt_flag = Inputs.Rosenblatt + mc_size = 50000 + + # Save parameter names + self.par_names = [] + for parIdx in range(ndim): + self.par_names.append(Inputs.Marginals[parIdx].name) + + # Create a multivariate probability distribution + # TODO: change this to make max_deg obligatory? at least in some specific cases? + if max_deg is not None: + JDist, poly_types = self.build_polytypes(rosenblatt=rosenblatt_flag) + self.JDist, self.poly_types = JDist, poly_types + + if self.input_data_given: + self.MCSize = len(Inputs.Marginals[0].input_data) + self.raw_data = np.zeros((ndim, self.MCSize)) + + for parIdx in range(ndim): + # Save parameter names + try: + self.raw_data[parIdx] = np.array( + Inputs.Marginals[parIdx].input_data) + except: + self.raw_data[parIdx] = self.JDist[parIdx].sample(mc_size) + + else: + # Generate random samples based on parameter distributions + self.raw_data = chaospy.generate_samples(mc_size, + domain=self.JDist) + + # Extract moments + for parIdx in range(ndim): + mu = np.mean(self.raw_data[parIdx]) + std = np.std(self.raw_data[parIdx]) + self.InputObj.Marginals[parIdx].moments = [mu, std] + + # Generate the bounds based on given inputs for marginals + bound_tuples = [] + for i in range(ndim): + if Inputs.Marginals[i].dist_type == 'unif': + low_bound, up_bound = Inputs.Marginals[i].parameters + else: + low_bound = np.min(self.raw_data[i]) + up_bound = np.max(self.raw_data[i]) + + bound_tuples.append((low_bound, up_bound)) + + self.bound_tuples = tuple(bound_tuples) + + # ------------------------------------------------------------------------- + def build_polytypes(self, rosenblatt): + """ + Creates the polynomial types to be passed to univ_basis_vals method of + the MetaModel object. + + Parameters + ---------- + rosenblatt : bool + Rosenblatt transformation flag. + + Returns + ------- + orig_space_dist : object + A chaospy JDist object or a gaussian_kde object. + poly_types : list + List of polynomial types for the parameters. + + """ + Inputs = self.InputObj + + all_data = [] + all_dist_types = [] + orig_joints = [] + poly_types = [] + + for parIdx in range(self.ndim): + + if Inputs.Marginals[parIdx].dist_type is None: + data = Inputs.Marginals[parIdx].input_data + all_data.append(data) + dist_type = None + else: + dist_type = Inputs.Marginals[parIdx].dist_type + params = Inputs.Marginals[parIdx].parameters + + if rosenblatt: + polytype = 'hermite' + dist = chaospy.Normal() + + elif dist_type is None: + polytype = 'arbitrary' + dist = None + + elif 'unif' in dist_type.lower(): + polytype = 'legendre' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Uniform(lower=params[0], upper=params[1]) + + elif 'norm' in dist_type.lower() and \ + 'log' not in dist_type.lower(): + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'hermite' + dist = chaospy.Normal(mu=params[0], sigma=params[1]) + + elif 'gamma' in dist_type.lower(): + polytype = 'laguerre' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Gamma(shape=params[0], + scale=params[1], + shift=params[2]) + + elif 'beta' in dist_type.lower(): + if not np.array(params).shape[0]>=4: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'jacobi' + dist = chaospy.Beta(alpha=params[0], beta=params[1], + lower=params[2], upper=params[3]) + + elif 'lognorm' in dist_type.lower(): + polytype = 'hermite' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + mu = np.log(params[0]**2/np.sqrt(params[0]**2 + params[1]**2)) + sigma = np.sqrt(np.log(1 + params[1]**2 / params[0]**2)) + dist = chaospy.LogNormal(mu, sigma) + # dist = chaospy.LogNormal(mu=params[0], sigma=params[1]) + + elif 'expon' in dist_type.lower(): + polytype = 'exponential' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Exponential(scale=params[0], shift=params[1]) + + elif 'weibull' in dist_type.lower(): + polytype = 'weibull' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Weibull(shape=params[0], scale=params[1], + shift=params[2]) + + else: + message = (f"DistType {dist_type} for parameter" + f"{parIdx+1} is not available.") + raise ValueError(message) + + if self.input_data_given or self.apce: + polytype = 'arbitrary' + + # Store dists and poly_types + orig_joints.append(dist) + poly_types.append(polytype) + all_dist_types.append(dist_type) + + # Prepare final output to return + if None in all_dist_types: + # Naive approach: Fit a gaussian kernel to the provided data + Data = np.asarray(all_data) + try: + orig_space_dist = st.gaussian_kde(Data) + except: + raise ValueError('The samples provided to the Marginals should be 1D only') + self.prior_space = orig_space_dist + else: + orig_space_dist = chaospy.J(*orig_joints) + try: + self.prior_space = st.gaussian_kde(orig_space_dist.sample(10000)) + except: + raise ValueError('Parameter values are not valid, please set differently') + + return orig_space_dist, poly_types + + # ------------------------------------------------------------------------- + def transform(self, X, params=None, method=None): + """ + Transform the samples via either a Rosenblatt or an isoprobabilistic + transformation. + + Parameters + ---------- + X : array of shape (n_samples,n_params) + Samples to be transformed. + method : string + If transformation method is 'user' transform X, else just pass X. + + Returns + ------- + tr_X: array of shape (n_samples,n_params) + Transformed samples. + + """ + # Check for built JDist + if not hasattr(self, 'JDist'): + raise AttributeError('Call function init_param_space first to create JDist') + + # Check if X is 2d + if X.ndim != 2: + raise AttributeError('X should have two dimensions') + + # Check if size of X matches Marginals + if X.shape[1]!= self.ndim: + raise AttributeError('The second dimension of X should be the same size as the number of marginals in the InputObj') + + if self.InputObj.Rosenblatt: + self.origJDist, _ = self.build_polytypes(False) + if method == 'user': + tr_X = self.JDist.inv(self.origJDist.fwd(X.T)).T + else: + # Inverse to original spcace -- generate sample ED + tr_X = self.origJDist.inv(self.JDist.fwd(X.T)).T + else: + # Transform samples via an isoprobabilistic transformation + n_samples, n_params = X.shape + Inputs = self.InputObj + origJDist = self.JDist + poly_types = self.poly_types + + disttypes = [] + for par_i in range(n_params): + disttypes.append(Inputs.Marginals[par_i].dist_type) + + # Pass non-transformed X, if arbitrary PCE is selected. + if None in disttypes or self.input_data_given or self.apce: + return X + + cdfx = np.zeros((X.shape)) + tr_X = np.zeros((X.shape)) + + for par_i in range(n_params): + + # Extract the parameters of the original space + disttype = disttypes[par_i] + if disttype is not None: + dist = origJDist[par_i] + else: + dist = None + polytype = poly_types[par_i] + cdf = np.vectorize(lambda x: dist.cdf(x)) + + # Extract the parameters of the transformation space based on + # polyType + if polytype == 'legendre' or disttype == 'uniform': + # Generate Y_Dists based + params_Y = [-1, 1] + dist_Y = st.uniform(loc=params_Y[0], + scale=params_Y[1]-params_Y[0]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'hermite' or disttype == 'norm': + params_Y = [0, 1] + dist_Y = st.norm(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'laguerre' or disttype == 'gamma': + params_Y = [1, params[1]] + dist_Y = st.gamma(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + # Compute CDF_x(X) + cdfx[:, par_i] = cdf(X[:, par_i]) + + # Compute invCDF_y(cdfx) + tr_X[:, par_i] = inv_cdf(cdfx[:, par_i]) + + return tr_X + + diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index fb579cf6ac0c771ec1af5e8ac7284818d325a855..c3abe2af8a7e30b6f0db38a48253a0c3bdd97ffb 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -99,7 +99,7 @@ class MetaModel(): """ - def __init__(self, input_obj, model_obj, meta_model_type='PCE', + def __init__(self, input_obj, model_obj = None, meta_model_type='PCE', pce_reg_method='OLS', bootstrap_method='fast', n_bootstrap_itrs=1, pce_deg=1, pce_q_norm=1.0, dim_red_method='no', verbose=False): @@ -114,13 +114,61 @@ class MetaModel(): self.pce_q_norm = pce_q_norm self.dim_red_method = dim_red_method self.verbose = verbose - - def check_init(self) -> None: - """ - Checks all the parameter values given to the constructor against - reference lists of what is implemented and can be run + + def build_metamodel(self) -> None: """ + Builds the parts for the metamodel (polynomes,...) that are neede before fitting. + + Returns + ------- None + DESCRIPTION. + + """ + if not hasattr(self, 'CollocationPoints'): + raise AttributeError('Please provide samples to the metamodle before building it.') + + self.n_params = len(self.input_obj.Marginals) + + # Generate polynomials + self.generate_polynomials(np.max(self.pce_deg)) + + # Initialize the nested dictionaries + if self.meta_model_type.lower() == 'gpe': + self.gp_poly = self.auto_vivification() + self.x_scaler = self.auto_vivification() + self.LCerror = self.auto_vivification() + else: + self.deg_dict = self.auto_vivification() + self.q_norm_dict = self.auto_vivification() + self.coeffs_dict = self.auto_vivification() + self.basis_dict = self.auto_vivification() + self.score_dict = self.auto_vivification() + self.clf_poly = self.auto_vivification() + self.LCerror = self.auto_vivification() + if self.dim_red_method.lower() == 'pca': + self.pca = self.auto_vivification() + + # Define an array containing the degrees + self.n_samples, ndim = self.CollocationPoints.shape + self.deg_array = self.__select_degree(ndim, self.n_samples) + + # Generate all basis indices + self.allBasisIndices = self.auto_vivification() + for deg in self.deg_array: + keys = self.allBasisIndices.keys() + if deg not in np.fromiter(keys, dtype=float): + # Generate the polynomial basis indices + for qidx, q in enumerate(self.pce_q_norm): + basis_indices = glexindex(start=0, stop=deg+1, + dimensions=self.n_params, + cross_truncation=q, + reverse=False, graded=True) + self.allBasisIndices[str(deg)][str(q)] = basis_indices + + # Evaluate the univariate polynomials on ExpDesign + if self.meta_model_type.lower() != 'gpe': + self.univ_p_val = self.univ_basis_vals(self.CollocationPoints) # ------------------------------------------------------------------------- @@ -136,24 +184,23 @@ class MetaModel(): """ Model = self.ModelObj - self.n_params = len(self.input_obj.Marginals) self.ExpDesignFlag = 'normal' # --- Prepare pce degree --- if self.meta_model_type.lower() == 'pce': if type(self.pce_deg) is not np.ndarray: self.pce_deg = np.array(self.pce_deg) - if self.ExpDesign.method == 'sequential': - raise Exception( - "Please use MetaModelEngine class for the sequential design!" - ) + #if self.ExpDesign.method == 'sequential': + # raise Exception( + # "Please use MetaModelEngine class for the sequential design!" + # ) - elif self.ExpDesign.method == 'normal': - self.train_norm_design(verbose=True) + #elif self.ExpDesign.method == 'normal': + self.train_norm_design(verbose=True) - else: - raise Exception("The method for experimental design you requested" - " has not been implemented yet.") + #else: + # raise Exception("The method for experimental design you requested" + # " has not been implemented yet.") # Zip the model run directories # TODO: can move this elsewhere? Is this important? @@ -164,7 +211,7 @@ class MetaModel(): return self # ------------------------------------------------------------------------- - def train_norm_design(self, parallel=True, verbose=False): + def train_norm_design(self, n_samples = None, parallel=True, verbose=False): """ This function loops over the outputs and each time step/point and fits the meta model. @@ -182,56 +229,46 @@ class MetaModel(): Meta-model object. """ - Model = self.ModelObj # Get the collocation points to run the forward model - CollocationPoints, OutputDict = self.generate_ExpDesign(Model) + if not hasattr(self.ExpDesign, 'n_init_samples'): + if n_samples == None or n_samples<=0: + raise ValueError('The number of samples provided for static training is not valid, please provide an int>0') + self.CollocationPoints, self.OutputDict = self.generate_ExpDesign(self.ModelObj, n_samples) + else: + self.CollocationPoints, self.OutputDict = self.generate_ExpDesign(self.ModelObj, self.ExpDesign.n_init_samples) + + if 'x_values' in self.OutputDict: + self.ExpDesign.x_values = self.OutputDict['x_values'] + del self.OutputDict['x_values'] - # Generate polynomials - self.generate_polynomials(np.max(self.pce_deg)) self.ndim = self.ExpDesign.ndim + + # Build the metamodel if not done already + self.build_metamodel() + + # Fit the surrogate + self.fit(self.CollocationPoints, self.OutputDict, parallel, verbose) + + + def fit(self, X, y, parallel = True, verbose = False): + """ + Fits the surrogate to the given data (samples X, outputs y). + Note here that the samples X should be the transformed samples provided + by the experimental design if the transformation is used there. + Parameters + ---------- + X : TYPE + DESCRIPTION. + y : TYPE + DESCRIPTION. - # Initialize the nested dictionaries - if self.meta_model_type.lower() == 'gpe': - self.gp_poly = self.auto_vivification() - self.x_scaler = self.auto_vivification() - self.LCerror = self.auto_vivification() - else: - self.deg_dict = self.auto_vivification() - self.q_norm_dict = self.auto_vivification() - self.coeffs_dict = self.auto_vivification() - self.basis_dict = self.auto_vivification() - self.score_dict = self.auto_vivification() - self.clf_poly = self.auto_vivification() - self.LCerror = self.auto_vivification() - if self.dim_red_method.lower() == 'pca': - self.pca = self.auto_vivification() - - # Define an array containing the degrees - n_samples, ndim = CollocationPoints.shape - self.deg_array = self.__select_degree(ndim, n_samples) - - # Generate all basis indices - self.allBasisIndices = self.auto_vivification() - for deg in self.deg_array: - keys = self.allBasisIndices.keys() - if deg not in np.fromiter(keys, dtype=float): - # Generate the polynomial basis indices - for qidx, q in enumerate(self.pce_q_norm): - basis_indices = glexindex(start=0, stop=deg+1, - dimensions=self.n_params, - cross_truncation=q, - reverse=False, graded=True) - self.allBasisIndices[str(deg)][str(q)] = basis_indices - - # Evaluate the univariate polynomials on ExpDesign - if self.meta_model_type.lower() != 'gpe': - univ_p_val = self.univ_basis_vals(CollocationPoints) - - if 'x_values' in OutputDict: - self.ExpDesign.x_values = OutputDict['x_values'] - del OutputDict['x_values'] + Returns + ------- + None. + """ + # --- Loop through data points and fit the surrogate --- if verbose: print(f"\n>>>> Training the {self.meta_model_type} metamodel " @@ -262,16 +299,16 @@ class MetaModel(): # Loop over the bootstrap iterations for b_i in enum_obj: if b_i > 0: - b_indices = np.random.randint(n_samples, size=n_samples) + b_indices = np.random.randint(self.n_samples, size=self.n_samples) else: - b_indices = np.arange(len(CollocationPoints)) + b_indices = np.arange(len(X)) - X_train_b = CollocationPoints[b_indices] + X_train_b = X[b_indices] if verbose and self.n_bootstrap_itrs == 1: - items = tqdm(OutputDict.items(), desc="Fitting regression") + items = tqdm(y.items(), desc="Fitting regression") else: - items = OutputDict.items() + items = y.items() # For loop over the components/outputs for key, Output in items: @@ -318,7 +355,7 @@ class MetaModel(): self.gp_poly[f'b_{b_i+1}'][key][f"y_{idx+1}"] = out[idx] else: - self.univ_p_val = univ_p_val[b_indices] + self.univ_p_val = self.univ_p_val[b_indices] if parallel and (not fast_bootstrap or b_i == 0): out = Parallel(n_jobs=-1, backend='multiprocessing')( delayed(self.adaptive_regression)(X_train_b, @@ -394,7 +431,7 @@ class MetaModel(): psi = self.create_psi(basis_indices, self.univ_p_val) # Calulate the cofficients of surrogate model - updated_out = self.fit( + updated_out = self.regression( psi, y[:, i], basis_indices, reg_method='OLS', sparsity=False ) @@ -418,7 +455,7 @@ class MetaModel(): meta_Model_type=self.meta_model_type) # ------------------------------------------------------------------------- - def generate_ExpDesign(self, Model): + def generate_ExpDesign(self, Model, n_samples): """ Prepares the experimental design either by reading from the prescribed data or running simulations. @@ -445,61 +482,23 @@ class MetaModel(): self.add_ExpDesign() ExpDesign = self.ExpDesign - if self.ExpDesignFlag != 'sequential': - # Read ExpDesign (training and targets) from the provided hdf5 - if ExpDesign.hdf5_file is not None: - - # Read hdf5 file - f = h5py.File(ExpDesign.hdf5_file, 'r+') - - # Read EDX and pass it to ExpDesign object - try: - ExpDesign.X = np.array(f["EDX/New_init_"]) - except KeyError: - ExpDesign.X = np.array(f["EDX/init_"]) - - # Update number of initial samples - ExpDesign.n_init_samples = ExpDesign.X.shape[0] - - # Read EDX and pass it to ExpDesign object - out_names = self.ModelObj.Output.names - ExpDesign.Y = {} - - # Extract x values - try: - ExpDesign.Y["x_values"] = dict() - for varIdx, var in enumerate(out_names): - x = np.array(f[f"x_values/{var}"]) - ExpDesign.Y["x_values"][var] = x - except KeyError: - ExpDesign.Y["x_values"] = np.array(f["x_values"]) - - # Store the output - for varIdx, var in enumerate(out_names): - try: - y = np.array(f[f"EDY/{var}/New_init_"]) - except KeyError: - y = np.array(f[f"EDY/{var}/init_"]) - ExpDesign.Y[var] = y - f.close() - else: - # Check if an old hdf5 file exists: if yes, rename it - hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' - if os.path.exists(hdf5file): - os.rename(hdf5file, 'old_'+hdf5file) + # Read ExpDesign (training and targets) from the provided hdf5 + if ExpDesign.hdf5_file is not None: + # TODO: need to run 'generate_ED' as well after this or not? + ExpDesign.read_from_file(self.out_names) + else: + # Check if an old hdf5 file exists: if yes, rename it + hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' + if os.path.exists(hdf5file): + os.rename(hdf5file, 'old_'+hdf5file) # ---- Prepare X samples ---- - ED_X, ED_X_tr = ExpDesign.generate_ED(ExpDesign.n_init_samples, - transform=True, - max_pce_deg=np.max(self.pce_deg)) - ExpDesign.X = ED_X - ExpDesign.collocationPoints = ED_X_tr + ExpDesign.generate_ED(n_samples, transform=True, max_pce_deg=np.max(self.pce_deg)) # ---- Run simulations at X ---- if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: print('\n Now the forward model needs to be run!\n') - ED_Y, up_ED_X = Model.run_model_parallel(ED_X) - ExpDesign.X = up_ED_X + ED_Y, up_ED_X = Model.run_model_parallel(ExpDesign.X) self.ModelOutputDict = ED_Y ExpDesign.Y = ED_Y else: @@ -510,7 +509,7 @@ class MetaModel(): raise Exception('Please provide either a dictionary or a hdf5' 'file to ExpDesign.hdf5_file argument.') - return ED_X_tr, self.ModelOutputDict + return ExpDesign.X_tr, self.ModelOutputDict # ------------------------------------------------------------------------- def univ_basis_vals(self, samples, n_max=None): @@ -606,7 +605,7 @@ class MetaModel(): return psi # ------------------------------------------------------------------------- - def fit(self, X, y, basis_indices, reg_method=None, sparsity=True): + def regression(self, X, y, basis_indices, reg_method=None, sparsity=True): """ Fit regression using the regression method provided. @@ -783,7 +782,7 @@ class MetaModel(): Psi = self.create_psi(BasisIndices, self.univ_p_val) # Calulate the cofficients of the meta model - outs = self.fit(Psi, ED_Y, BasisIndices) + outs = self.regression(Psi, ED_Y, BasisIndices) # Calculate and save the score of LOOCV score, LCerror = self.corr_loocv_error(outs['clf_poly'], @@ -1176,8 +1175,6 @@ class MetaModel(): std_pred : dict Standard deviatioon of the predictions. """ - outputs = self.ModelObj.Output.names - # Generate or transform (if need be) samples if samples is None: # Generate @@ -1276,7 +1273,7 @@ class MetaModel(): mean_pred_all[k][i] = v # Compute the moments of predictions over the predictions - for output in outputs: + for output in self.out_names: # Only use bootstraps with finite values finite_rows = np.isfinite( mean_pred_all[output]).all(axis=2).all(axis=1) @@ -1295,7 +1292,7 @@ class MetaModel(): return mean_pred, std_pred # ------------------------------------------------------------------------- - def create_model_error(self, X, y, name='Calib'): + def create_model_error(self, X, y, Model, name='Calib'): """ Fits a GPE-based model error. @@ -1315,7 +1312,6 @@ class MetaModel(): Self object. """ - Model = self.ModelObj outputNames = Model.Output.names self.errorRegMethod = 'GPE' self.errorclf_poly = self.auto_vivification() @@ -1473,13 +1469,10 @@ class MetaModel(): n_combo[i] /= math.factorial(ndim) * math.factorial(d) return n_combo - if self.ExpDesignFlag != 'sequential': - deg_new = max_deg - else: - d = nitr if nitr != 0 and self.n_params > 5 else 1 - min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d)) - deg_new = max_deg - # deg_new = range(1, max_deg+1)[min_index] + deg_new = max_deg + d = nitr if nitr != 0 and self.n_params > 5 else 1 + min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d)) + # deg_new = range(1, max_deg+1)[min_index] if deg_new > min_Deg and self.pce_reg_method.lower() != 'fastard': deg_array = np.arange(min_Deg, deg_new+1) diff --git a/tests/test_ExpDesign.py b/tests/test_ExpDesign.py index 6a383cae68e6362d36115df841cf7b4a6aa2d078..72a798804165cb1dddec4cc557a96935c518ecf5 100644 --- a/tests/test_ExpDesign.py +++ b/tests/test_ExpDesign.py @@ -9,6 +9,7 @@ Class ExpDesigns: check_valid_inputs - x generate_samples generate_ED + read_from_file init_param_space - x build_polytypes - x random_sampler @@ -595,6 +596,21 @@ def test_generate_ED() -> None: exp = ExpDesigns(inp) samples = exp.generate_ED(4) +def test_generate_ED_negsamplenum(): + """ + Generate ED for neg number of samples + """ + x = np.random.uniform(0,1,1000) + X = np.random.uniform(0,1,(1,10)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = ExpDesigns(inp, sampling_method = 'user') + with pytest.raises(ValueError) as excinfo: + exp.generate_ED(-1) + assert str(excinfo.value) == 'A negative number of samples cannot be created. Please provide positive n_samples' + + def test_generate_ED_usernoX() -> None: """ @@ -637,6 +653,31 @@ def test_generate_ED_userX() -> None: exp = ExpDesigns(inp, sampling_method = 'user') exp.X = X samples = exp.generate_ED(4) + + +#%% Test ExpDesign.read_from_file + +def test_read_from_file(): + """ + No file given to read in + """ + x = np.random.uniform(0,1,1000) + X = np.random.uniform(0,1,(1,10)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = ExpDesigns(inp, sampling_method = 'user') + with pytest.raises(AttributeError) as excinfo: + exp.read_from_file(['Out']) + assert str(excinfo.value) == 'ExpDesign cannot be read in, please provide hdf5 file first' + +# TODO: test with provided file + + +#%% Test ExpDesign.generate_ED(self, n_samples, transform=False, max_pce_deg=None) + + + if __name__ == '__main__': None diff --git a/tests/test_InputSpace.py b/tests/test_InputSpace.py new file mode 100644 index 0000000000000000000000000000000000000000..8c5476be4fbe9e13100dd2c03ae3a7f77ee97979 --- /dev/null +++ b/tests/test_InputSpace.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- +""" +Test the InputSpace class in bayesvalidrox. +Tests are available for the following functions +Class InputSpace: + check_valid_inputs - x + generate_samples + init_param_space - x + build_polytypes - x + transform - x + +@author: Rebecca Kohlhaas +""" +import sys +sys.path.append("src/") +import pytest +import numpy as np + +from bayesvalidrox.surrogate_models.inputs import Input +import bayesvalidrox.surrogate_models.input_space as exp +from bayesvalidrox.surrogate_models.input_space import InputSpace + +#%% Test ExpDesign.check_valid_input + +def test_check_valid_input_hasmarg() -> None: + """ + Distribution not built if no marginals set + """ + inp = Input() + with pytest.raises(AssertionError) as excinfo: + init = InputSpace(inp) + assert str(excinfo.value) == 'Cannot build distributions if no marginals are given' + +def test_check_valid_input_haspriors() -> None: + """ + Distribution not built if no distribution set for the marginals + """ + inp = Input() + inp.add_marginals() + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Not all marginals were provided priors' + +def test_check_valid_input_priorsmatch() -> None: + """ + Distribution not built if dist types do not align + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.add_marginals() + inp.Marginals[1].dist_type = 'normal' + inp.Marginals[1].parameters = [0,1] + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Distributions cannot be built as the priors have different types' + +def test_check_valid_input_samples() -> None: + """ + Design built correctly - samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.add_marginals() + inp.Marginals[1].input_data = x+2 + try: + exp = InputSpace(inp) + except AssertionError: + pytest.fail("ExpDesign raised AssertionError unexpectedly!") + # TODO: check for better options to assert that no error at all occurred + + +def test_check_valid_input_both() -> None: + """ + Design no built - samples and dist type given + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.Marginals[0].dist_type = 'normal' + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Both samples and distribution type are given. Please choose only one.' + +#def test_check_valid_input_distnotok() -> None: +# """ +# Design built incorrectly - dist types without parameters +# """ +# inp = Input() +# inp.add_marginals() +# inp.Marginals[0].dist_type = 'normal' +# inp.add_marginals() +# inp.Marginals[1].dist_type = 'normal' +# with pytest.raises(AssertionError) as excinfo: +# exp = ExpDesigns(inp) +# assert str(excinfo.value) == 'Some distributions do not have characteristic values' + +def test_check_valid_input_distok() -> None: + """ + Design built correctly - dist types + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + inp.add_marginals() + inp.Marginals[1].dist_type = 'normal' + inp.Marginals[1].parameters = [0,1] + try: + exp = InputSpace(inp) + except AssertionError: + pytest.fail("ExpDesign raised AssertionError unexpectedly!") + # TODO: check for better options to assert that no error at all occurred + +#%% Test ExpDesign.build_polytypes +def test_build_polytypes_normalerr() -> None: + """ + Build dist 'normal' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_normal() -> None: + """ + Build dist 'normal' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_uniferr() -> None: + """ + Build dist 'unif' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'unif' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_unif() -> None: + """ + Build dist 'unif' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'unif' + inp.Marginals[0].parameters = [0,1] + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_gammaerr() -> None: + """ + Build dist 'gamma' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'gamma' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_gamma() -> None: + """ + Build dist 'gamma' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'gamma' + inp.Marginals[0].parameters = [0,1,0] + exp = InputSpace(inp) + with pytest.raises(ValueError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Parameter values are not valid, please set differently' + +def test_build_polytypes_betaerr() -> None: + """ + Build dist 'beta' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_beta() -> None: + """ + Build dist 'beta' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_lognormerr() -> None: + """ + Build dist 'lognorm' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'lognorm' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_lognorm() -> None: + """ + Build dist 'lognorm' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'lognorm' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_exponerr() -> None: + """ + Build dist 'expon' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_expon() -> None: + """ + Build dist 'expon' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_weibullerr() -> None: + """ + Build dist 'weibull' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'weibull' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_weibull() -> None: + """ + Build dist 'weibull' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'weibull' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_arbitrary() -> None: + """ + Build poly 'arbitrary' + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_rosenblatt() -> None: + """ + Build dist with rosenblatt + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(True) + +def test_build_polytypes_samples() -> None: + """ + Build dist from samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_samples2d() -> None: + """ + Build dist from samples - samples too high dim + """ + x = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + with pytest.raises(ValueError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'The samples provided to the Marginals should be 1D only' + +#%% Test ExpDesign.init_param_space + +def test_init_param_space_nomaxdegsample() -> None: + """ + Init param space without max_deg for given samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space() + +def test_init_param_space_nomaxdegdist() -> None: + """ + Init param space without max_deg for given dist + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.init_param_space() + +def test_init_param_space_maxdeg() -> None: + """ + Init param space with max_deg for given samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + +def test_init_param_space_maxdegdist() -> None: + """ + Init param space with max_deg for given dist + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + + +#%% Test ExpDesign.transform + +def test_transform_noparamspace() -> None: + """ + Call transform without a built JDist + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'Call function init_param_space first to create JDist' + +def test_transform_dimerrlow() -> None: + """ + Call transform with too few dimensions + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(x) + assert str(excinfo.value) == 'X should have two dimensions' + +def test_transform_dimerrhigh() -> None: + """ + Call transform with too many dimensions + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(1,1,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'X should have two dimensions' + +def test_transform_dimerr0() -> None: + """ + Call transform with wrong X.shape[0] + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'The second dimension of X should be the same size as the number of marginals in the InputObj' + +def test_transform_paramspace() -> None: + """ + Transform successfully + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(1000,1)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + exp.transform(y) + + +if __name__ == '__main__': + None diff --git a/tests/test_MetaModel.py b/tests/test_MetaModel.py index d8bcfe0e1b2d459c560a5502ef2d9cdbbee893d4..7310b3708e0515c0f526f842ab7e3011da2c151e 100644 --- a/tests/test_MetaModel.py +++ b/tests/test_MetaModel.py @@ -3,6 +3,7 @@ Test the MetaModel class in bayesvalidrox. Tests are available for the following functions Class MetaModel: + build_metamodel create_metamodel *not used again train_norm_design update_pce_coeffs @@ -38,6 +39,34 @@ from bayesvalidrox.pylink.pylink import PyLinkForwardModel as PL #%% Test MetaMod constructor on its own def test_metamod() -> None: + """ + Construct MetaModel without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + +#%% Test MetaModel.build_metamodel + +def test_build_metamodel_nosamples() -> None: + """ + Build MetaModel without samples + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + with pytest.raises(AttributeError) as excinfo: + mm.build_metamodel() + assert str(excinfo.value) == 'Please provide samples to the metamodle before building it.' + + +def test_build_metamodel() -> None: """ Build MetaModel without inputs """ @@ -47,6 +76,8 @@ def test_metamod() -> None: inp.Marginals[0].parameters = [0,1] mod = PL() mm = MetaModel(inp, mod) + mm.build_metamodel() + 'Please provide samples to the metamodle before building it.' #%% Test MetaMod.generate_polynomials @@ -97,7 +128,7 @@ def test_generate_polynomials_deg() -> None: def test_add_ExpDesign() -> None: """ - Generate ExpDesign in class + Add ExpDesign in MetaModel """ inp = Input() inp.add_marginals() @@ -107,6 +138,23 @@ def test_add_ExpDesign() -> None: mm = MetaModel(inp, mod) mm.add_ExpDesign() +#%% Test MetaModel.generate_ExpDesign + +def test_generate_ExpDesign() -> None: + """ + Generate ExpDesign in MetaModel + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + mm.generate_ExpDesign(mod, 10) + # TODO: model not working for this, create a workaround/test model! + + + if __name__ == '__main__': None diff --git a/tests/test_engine.py b/tests/test_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3a6a7ac43aedf5df4c307210da5dde7fe136143c --- /dev/null +++ b/tests/test_engine.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Tests the class Engine in bayesvalidrox +Tests are available for the following functions +Engine: + + +@author: Rebecca Kohlhaas +""" +import numpy as np + +import sys +sys.path.append("src/") +import pytest + +from bayesvalidrox.surrogate_models.inputs import Input +from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns +from bayesvalidrox.surrogate_models.surrogate_models import MetaModel +from bayesvalidrox.pylink.pylink import PyLinkForwardModel as PL +from bayesvalidrox.surrogate_models.engine import Engine + + +#%% Test Engine constructor + + +def test_engine() -> None: + """ + Build Engine without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp) + expdes = ExpDesigns(inp) + engine = Engine(mm, mod, expdes) + +#%% Test Engine.start_engine + +def test_start_engine() -> None: + """ + Build Engine without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp) + expdes = ExpDesigns(inp) + engine = Engine(mm, mod, expdes) + engine.start_engine() + + +#%% Test Engine.train_normal +# TODO: build mock model to do this? - test again in full-length examples