From 0647d188ae8389107a6eaa66cac22dc1fe5590ff Mon Sep 17 00:00:00 2001 From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com> Date: Mon, 18 Dec 2023 09:12:19 +0100 Subject: [PATCH] Started new engine class New class 'Engine' responsible for both training sequentially and non-sequentially. Currently training with old MetaModEngine and in MetaMod directly still possible, but will be deprecated and removed soon. Also started splitting ExpDesign into InputSpace (for MetaMod, created dists and polynomials) and ExpDesign (for Engine, can also sample) --- .coverage | Bin 53248 -> 53248 bytes .../test_analytical_function.py | 38 +- .../post_processing/post_processing.py | 23 +- src/bayesvalidrox/surrogate_models/engine.py | 108 ++++ .../surrogate_models/exp_designs.py | 137 ++--- .../surrogate_models/input_space.py | 539 ++++++++++++++++++ .../surrogate_models/surrogate_models.py | 259 ++++----- tests/test_ExpDesign.py | 41 ++ tests/test_InputSpace.py | 463 +++++++++++++++ tests/test_MetaModel.py | 50 +- tests/test_engine.py | 57 ++ 11 files changed, 1472 insertions(+), 243 deletions(-) create mode 100644 src/bayesvalidrox/surrogate_models/engine.py create mode 100644 src/bayesvalidrox/surrogate_models/input_space.py create mode 100644 tests/test_InputSpace.py create mode 100644 tests/test_engine.py diff --git a/.coverage b/.coverage index 5a50f3b44fe726ce44b701f18e81b5a7962fe24f..fb40a106ecddff155efa3de1fc357ccf13425598 100644 GIT binary patch delta 1301 zcmY*Z|8Emz7=Q2Xz204~z3bY!ZtIu38w9erPz4f4hFQUN(oWb2T2oOrHCr7qbFc{o zb?qEKY&1HWi^&os<1avF6G9@|Mq3R=L?R*a2V5=2n2^YtVQ5_G`CQwv?aiC#eV^y^ zyq|kN&-3PD7pd4q>N56H3e}}D-tBca=m~lUzK2iar>RBi9M!du_>Uw5iuy%Y4=rOf zDX9ko+trwlS9kkNWIzI7k2p9I9v&IoHhOBmdtHj%5O#rk2u{3GKRhz}{&@e`=$mir zc?X;TD63R9zYZR%VSWc-_Ek)%U^;d4I%;0<MMk>@$A-fr0KL_u*7-w*>MfGny6<qH z$EPxVt}XP>CdTaN#!biZ2b_hwYrMi=;63Iq=_tJu|B2hE9F?Ga3yBp{z4USqtsKCh zu<q3zs|pDj;(LWB0;P2ccG~?826=ykg0_wFT)i07)o836)Hwqf#ML14syN3wTHd@~ zTeE5jz4k;}@J?&D!nPyvp?F`sC&tAoAua3^o)w$|GJk8HF^9}HlYzg@f5(5t*YmtF z$Srbn+%z}NHE=w8o&A^%vmw^cwkPW>|1eQe!2~BO80A{_UW+V576cgrZW%{59cgRW z)KYJfz;BlDxRmM4cQ(1f+X+2J>F4t=mbRAVa$L(u`Pt%>&qWj5q|vqHq0rjMJ<zn5 z$brqv-~YH+G(nt2OU{$7pT>eD`upD#(MONV6VXf3+^-LM5E^R8T_C(pod-E_x+9;p zYw@T>L*5co@Jv)@Ih!iZL<=@8zqOq0E#>W<h1p^jz!yq(Evx0Dg<j49m#lZ-t7oX! zn{~2wcsV)Ov16GmjpQRA7E(o~AfgN~AI#e|efF2)^F^TYOu#k+Y&LDWUnkE*3;iWe z*{-`;2_#!m#o3}=vuPHMz}6=C(v~KC@)R*dR7EGJXU-Fi<XQy~id7*C#MInbccPC; zbZk!SvL-rshrE^uTS$zp2IS<m_$MTDg3%SctM+}`7ESMed>wh0g$ATo5|L>44{tEb zpMQqX>96m8lu9km-)H`MxMD@<{8I?+L#RzV8ve$G5Jm$-#P7cy@6HjelU@TbP9ejR z_Gq!f161jiepG2&Oegm0M%K-S(dw<ZavItnY}^H{T+6DdujYFbdm7c7XAMnNx5@{5 zL(NB&+8ei9t3{fMK~%2|LcgeIW&?(~wB>R{RBz8JHY0&Ku8~E>TPe~A35w`YnR6Yi z_>9;pwulaR!?y&d0h&N4N@bDd$|6aX#qCZ$H-AH>6!Eaiq~Y9AaZnr(55u`Su^#>r R$V|z)k#yq*qrP`D@jn^1cf$Yx delta 834 zcmYjPZAep57(RD9+r4LZyLW!%a@$=)u@AJ(56D7ln;+2(GY~TRk@Q8GvW--jA=?#& zoHKQt9~BY$uU|@Q5Q36}?4Q|>uzp2$iy#PekhoN~&UG95ayjRDpXa^L`|@(cX{9)= zd<C6K0nT9BS&bys6V(Oy56(m*T~N_1;O`AC*4N+N>FKih`diFcY;tivZ+HJKucxP1 zrc5%0J&jtm+X%s+0arDi)wn&rzCE>=s4cOo^PdoDF^lQTf<;T~+^ARk0O`?J%^UUw zYuCM2{ZKtsx#2dPjYhg5Zfd(o2{_Vri87P3UUpZRnjtaIYp=lr4J^tvFv>b=^7|U^ z^mN|!#B5hSpExHl>ww?n|L{NgF@Bhfb8fDh6F5ruPB*H%rYqFa>?*s&F0hAKt)?BV zp=T(BDp47k2r%ohJpBw4<ON6}s{p}TN&SL9>W^|}LRieuUwXZwsI;W8z(n%nCJ4Zq zCDE{JNQWFtQ^-ijLL;0TR<;+bC^Tp*hG1iS?zAK+fZqN#*|9s6q=DoZ2`0jOpFQ=y zbvHbfG&mCWfMi%qhJs7>K*AV4A}uEEDMLC;@|46UN&_RLDqWC{JqkUiEEG5p@eF2i zlo)YYC=n+@Hd~ewa}v4r`&bWT!I>lUvH9?01>Hx>qu<A^;}ti>m#NzTFjhsb1^@{B z<MZ6FGvyTc`*40I5L|S8;g^4Jl>u<79DqpxuBKL32I|OfAik_vs2OyriQ$k!y^-Gf zrp+3~cREBFl35Sql~MIE>S$JxFL_3%3`iVkohf&drSFHBg*juO-q*4yQ*YhNDj9iA znf2YnLle!!9^V)%z|us1KB6E(MGWw7w`xe0z}I4WvW~onO)geD^}(tXcrT{oGINs0 H-j9+0f^yrh diff --git a/examples/analytical-function/test_analytical_function.py b/examples/analytical-function/test_analytical_function.py index edb0cee0c..e356d37da 100644 --- a/examples/analytical-function/test_analytical_function.py +++ b/examples/analytical-function/test_analytical_function.py @@ -20,20 +20,23 @@ import numpy as np import pandas as pd import sys import joblib +import matplotlib +matplotlib.use('agg') + # import bayesvalidrox # Add BayesValidRox path sys.path.append("src/") from bayesvalidrox.pylink.pylink import PyLinkForwardModel from bayesvalidrox.surrogate_models.inputs import Input +from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns from bayesvalidrox.surrogate_models.surrogate_models import MetaModel from bayesvalidrox.surrogate_models.meta_model_engine import MetaModelEngine from bayesvalidrox.post_processing.post_processing import PostProcessing from bayesvalidrox.bayes_inference.bayes_inference import BayesInference from bayesvalidrox.bayes_inference.discrepancy import Discrepancy -import matplotlib -matplotlib.use('agg') +from bayesvalidrox.surrogate_models.engine import Engine if __name__ == "__main__": @@ -88,7 +91,7 @@ if __name__ == "__main__": # ===================================================== # ========== DEFINITION OF THE METAMODEL ============ # ===================================================== - MetaModelOpts = MetaModel(Inputs, Model) + MetaModelOpts = MetaModel(Inputs)#, Model) # Select if you want to preserve the spatial/temporal depencencies # MetaModelOpts.dim_red_method = 'PCA' @@ -134,24 +137,33 @@ if __name__ == "__main__": # ------------------------------------------------ # ------ Experimental Design Configuration ------- # ------------------------------------------------ - MetaModelOpts.add_ExpDesign() + ExpDesign = ExpDesigns(Inputs) # One-shot (normal) or Sequential Adaptive (sequential) Design - MetaModelOpts.ExpDesign.method = 'normal' - MetaModelOpts.ExpDesign.n_init_samples = 3*ndim + ExpDesign.method = 'normal' + ExpDesign.n_init_samples = 150#3*ndim # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley # 6) chebyshev(FT) 7) grid(FT) 8)user - MetaModelOpts.ExpDesign.sampling_method = 'latin_hypercube' + ExpDesign.sampling_method = 'latin_hypercube' # Provide the experimental design object with a hdf5 file # MetaModelOpts.ExpDesign.hdf5_file = 'ExpDesign_AnalyticFunc.hdf5' - - # Train the meta model - meta_model_engine = MetaModelEngine(MetaModelOpts) - meta_model_engine.run() - PCEModel = meta_model_engine.MetaModel + + if 0: + # Train the meta model with the old engine + MetaModelOpts.ExpDesign = ExpDesign + meta_model_engine = MetaModelEngine(MetaModelOpts) + meta_model_engine.run() + PCEModel = meta_model_engine.MetaModel + + if 1: + # Test the new engine + engine = Engine(MetaModelOpts, Model, ExpDesign) + engine.start_engine() + engine.train_normal() + PCEModel = engine.MetaModel # Save PCE models with open(f'PCEModel_{Model.name}.pkl', 'wb') as output: @@ -164,7 +176,7 @@ if __name__ == "__main__": # ===================================================== # ========= POST PROCESSING OF METAMODELS =========== # ===================================================== - PostPCE = PostProcessing(PCEModel) + PostPCE = PostProcessing(PCEModel, Model) # Plot to check validation visually. PostPCE.valid_metamodel(n_samples=1) diff --git a/src/bayesvalidrox/post_processing/post_processing.py b/src/bayesvalidrox/post_processing/post_processing.py index 4db354bbd..0b77fa380 100644 --- a/src/bayesvalidrox/post_processing/post_processing.py +++ b/src/bayesvalidrox/post_processing/post_processing.py @@ -32,8 +32,9 @@ class PostProcessing: expected to be performed change this to `'valid'`. """ - def __init__(self, MetaModel, name='calib'): + def __init__(self, MetaModel, Model, name='calib'): self.MetaModel = MetaModel + self.ModelObj = Model self.name = name # ------------------------------------------------------------------------- @@ -60,7 +61,7 @@ class PostProcessing: bar_plot = True if plot_type == 'bar' else False meta_model_type = self.MetaModel.meta_model_type - Model = self.MetaModel.ModelObj + Model = self.ModelObj # Read Monte-Carlo reference self.mc_reference = Model.read_mc_reference() @@ -170,7 +171,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - Model = MetaModel.ModelObj + Model = self.ModelObj if samples is None: self.n_samples = n_samples @@ -229,7 +230,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - Model = MetaModel.ModelObj + Model = self.ModelObj # Set the number of samples if n_samples: @@ -570,7 +571,7 @@ class PostProcessing: sobol_cell_, total_sobol_ = {}, {} - for output in PCEModel.ModelObj.Output.names: + for output in self.ModelObj.Output.names: n_meas_points = len(coeffs_dict[f'b_{b_i+1}'][output]) @@ -758,7 +759,7 @@ class PostProcessing: total_sobol_all[k][i] = v self.total_sobol = {} - for output in PCEModel.ModelObj.Output.names: + for output in self.ModelObj.Output.names: self.total_sobol[output] = np.mean(total_sobol_all[output], axis=0) # ---------------- Plot ----------------------- @@ -771,7 +772,7 @@ class PostProcessing: fig = plt.figure() - for outIdx, output in enumerate(PCEModel.ModelObj.Output.names): + for outIdx, output in enumerate(self.ModelObj.Output.names): # Extract total Sobol indices total_sobol = self.total_sobol[output] @@ -961,7 +962,7 @@ class PostProcessing: self.n_samples = 1000 PCEModel = self.MetaModel - Model = self.MetaModel.ModelObj + Model = self.ModelObj n_samples = self.n_samples # Create 3D-Grid @@ -1063,7 +1064,7 @@ class PostProcessing: """ MetaModel = self.MetaModel - outputs = MetaModel.ModelObj.Output.names + outputs = self.ModelObj.Output.names pce_means_b = {} pce_stds_b = {} @@ -1174,7 +1175,7 @@ class PostProcessing: Dictionary of results. """ - Model = self.MetaModel.ModelObj + Model = self.ModelObj if samples is None: samples = self._get_sample() @@ -1274,7 +1275,7 @@ class PostProcessing: None. """ - Model = self.MetaModel.ModelObj + Model = self.ModelObj newpath = f'Outputs_PostProcessing_{self.name}/' if not os.path.exists(newpath): diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py new file mode 100644 index 000000000..6320b6e7e --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/engine.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Dec 16 10:16:50 2023 +Engine to train the surrogate with + +@author: Rebecca Kohlhaas +""" +import h5py +import numpy as np +import os + +from .inputs import Input +from .exp_designs import ExpDesigns +from .surrogate_models import MetaModel + +class Engine(): + + + def __init__(self, MetaMod, Model, ExpDes): + self.MetaModel = MetaMod + self.Model = Model + self.ExpDesign = ExpDes + + def start_engine(self) -> None: + """ + Do all the preparations that need to be run before the actual training + + Returns + ------- + None + + """ + self.out_names = self.Model.Output.names + self.MetaModel.out_names = self.out_names + + # Add expDesign to MetaModel if it does not have any + if not hasattr(self.MetaModel, 'ExpDesign'): + self.MetaModel.ExpDesign = self.ExpDesign + + def train_normal(self, parallel = False, verbose = False) -> None: + """ + Trains surrogate on static samples only. + Samples are taken from the experimental design and the specified + model is run on them. + Alternatively the samples can be read in from a provided hdf5 file. + + Returns + ------- + None + + """ + + ExpDesign = self.ExpDesign + MetaModel = self.MetaModel + # Read ExpDesign (training and targets) from the provided hdf5 + if ExpDesign.hdf5_file is not None: + # TODO: need to run 'generate_ED' as well after this or not? + ExpDesign.read_from_file(self.out_names) + else: + # Check if an old hdf5 file exists: if yes, rename it + hdf5file = f'ExpDesign_{self.Model.name}.hdf5' + if os.path.exists(hdf5file): + os.rename(hdf5file, 'old_'+hdf5file) + + # ---- Prepare X samples ---- + # For training the surrogate use ExpDesign.X_tr, ExpDesign.X is for the model to run on + ExpDesign.generate_ED(ExpDesign.n_init_samples, + transform=True, + max_pce_deg=np.max(MetaModel.pce_deg)) + + # ---- Run simulations at X ---- + if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: + print('\n Now the forward model needs to be run!\n') + ED_Y, up_ED_X = self.Model.run_model_parallel(ExpDesign.X) + ExpDesign.Y = ED_Y + else: + # Check if a dict has been passed. + if not type(ExpDesign.Y) is dict: + raise Exception('Please provide either a dictionary or a hdf5' + 'file to ExpDesign.hdf5_file argument.') + + # Separate output dict and x-values + if 'x_values' in ExpDesign.Y: + ExpDesign.x_values = ExpDesign.Y['x_values'] + del ExpDesign.Y['x_values'] + + MetaModel.ndim = ExpDesign.ndim + + # Build and train the MetaModel on the static samples + MetaModel.CollocationPoints = ExpDesign.X_tr + MetaModel.build_metamodel() + + # Fit the surrogate + MetaModel.fit(ExpDesign.X_tr, ExpDesign.Y, parallel, verbose) + + + def train_sequential(self, parallel = False, verbose = False) -> None: + """ + Train the surrogate in a sequential manner. + First build and train evereything on the static samples, then iterate + choosing more samples and refitting the surrogate on them. + + Returns + ------- + None + + """ + self.train_normal(parallel, verbose) \ No newline at end of file diff --git a/src/bayesvalidrox/surrogate_models/exp_designs.py b/src/bayesvalidrox/surrogate_models/exp_designs.py index 602717cf7..3e26c8153 100644 --- a/src/bayesvalidrox/surrogate_models/exp_designs.py +++ b/src/bayesvalidrox/surrogate_models/exp_designs.py @@ -202,7 +202,7 @@ class ExpDesigns: step_snapshot=1, max_a_post=[], adapt_verbose=False): self.InputObj = Input - self.method = method + self.method = method # TODO: this still doing sth? self.meta_Model_type = meta_Model_type self.sampling_method = sampling_method self.hdf5_file = hdf5_file @@ -344,14 +344,15 @@ class ExpDesigns: Returns ------- - samples : array of shape (n_samples, n_params) - Selected training samples. + None """ - Inputs = self.InputObj - if not hasattr(self, 'n_init_samples'): - self.n_init_samples = self.ndim + 1 + if n_samples <0: + raise ValueError('A negative number of samples cannot be created. Please provide positive n_samples') n_samples = int(n_samples) + + if not hasattr(self, 'n_init_samples'): + self.n_init_samples = n_samples # Generate the samples based on requested method self.init_param_space(max_pce_deg) @@ -398,101 +399,67 @@ class ExpDesigns: method=sampling_method ) if sampling_method == 'user' or not self.apce: - return samples, tr_samples + self.X = samples + self.X_tr = tr_samples else: - return tr_samples, samples + self.X = tr_samples + self.X_tr = samples + #return tr_samples, samples # TODO: why is this swapped here? else: - return samples - - - # ------------------------------------------------------------------------- - def generate_training_data(self, Model, max_deg): + self.X = samples + self.X_tr = None + + def read_from_file(self, out_names): """ - Prepares the experimental design either by reading from the prescribed - data or running simulations. + Reads in the ExpDesign from a provided h5py file and saves the results. Parameters ---------- - Model : obj - Model object. - - Raises - ------ - Exception - If model sumulations are not provided properly. + out_names : list of strings + The keys that are in the outputs (y) saved in the provided file. Returns ------- - ED_X_tr: array of shape (n_samples, n_params) - Training samples transformed by an isoprobabilistic transformation. - ED_Y: dict - Model simulations (target) for all outputs. + None. + """ - if self.ExpDesignFlag != 'sequential': - # Read ExpDesign (training and targets) from the provided hdf5 - if self.hdf5_file is not None: + if self.hdf5_file == None: + raise AttributeError('ExpDesign cannot be read in, please provide hdf5 file first') - # Read hdf5 file - f = h5py.File(self.hdf5_file, 'r+') + # Read hdf5 file + f = h5py.File(self.hdf5_file, 'r+') - # Read EDX and pass it to ExpDesign object - try: - self.X = np.array(f["EDX/New_init_"]) - except KeyError: - self.X = np.array(f["EDX/init_"]) + # Read EDX and pass it to ExpDesign object + try: + self.X = np.array(f["EDX/New_init_"]) + except KeyError: + self.X = np.array(f["EDX/init_"]) - # Update number of initial samples - self.n_init_samples = self.X.shape[0] + # Update number of initial samples + self.n_init_samples = self.X.shape[0] - # Read EDX and pass it to ExpDesign object - out_names = self.ModelObj.Output.names - self.Y = {} + # Read EDX and pass it to ExpDesign object + self.Y = {} - # Extract x values - try: - self.Y["x_values"] = dict() - for varIdx, var in enumerate(out_names): - x = np.array(f[f"x_values/{var}"]) - self.Y["x_values"][var] = x - except KeyError: - self.Y["x_values"] = np.array(f["x_values"]) - - # Store the output - for varIdx, var in enumerate(out_names): - try: - y = np.array(f[f"EDY/{var}/New_init_"]) - except KeyError: - y = np.array(f[f"EDY/{var}/init_"]) - self.Y[var] = y - f.close() - else: - # Check if an old hdf5 file exists: if yes, rename it - hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' - if os.path.exists(hdf5file): - os.rename(hdf5file, 'old_'+hdf5file) - - # ---- Prepare X samples ---- - ED_X, ED_X_tr = self.generate_ED(self.n_init_samples, - transform=True, - max_pce_deg=max_deg) - self.X = ED_X - self.collocationPoints = ED_X_tr + # Extract x values + try: + self.Y["x_values"] = dict() + for varIdx, var in enumerate(out_names): + x = np.array(f[f"x_values/{var}"]) + self.Y["x_values"][var] = x + except KeyError: + self.Y["x_values"] = np.array(f["x_values"]) + + # Store the output + for varIdx, var in enumerate(out_names): + try: + y = np.array(f[f"EDY/{var}/New_init_"]) + except KeyError: + y = np.array(f[f"EDY/{var}/init_"]) + self.Y[var] = y + f.close() - # ---- Run simulations at X ---- - if not hasattr(self, 'Y') or self.Y is None: - print('\n Now the forward model needs to be run!\n') - ED_Y, up_ED_X = Model.run_model_parallel(ED_X) - self.X = up_ED_X - self.Y = ED_Y - else: - # Check if a dict has been passed. - if type(self.Y) is dict: - self.Y = self.Y - else: - raise Exception('Please provide either a dictionary or a hdf5' - 'file to ExpDesign.hdf5_file argument.') - self.X = ED_X_tr - return self.X, self.Y + # ------------------------------------------------------------------------- def init_param_space(self, max_deg=None): diff --git a/src/bayesvalidrox/surrogate_models/input_space.py b/src/bayesvalidrox/surrogate_models/input_space.py new file mode 100644 index 000000000..adaa807ad --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/input_space.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import numpy as np +import math +import itertools +import chaospy +import scipy.stats as st +from tqdm import tqdm +import h5py +import os + +from .apoly_construction import apoly_construction + + +class InputSpace: + """ + This class generates the input space for the metamodel polynomials + using the marginals defined in the `Input` object. + + Attributes + ---------- + Input : obj + Input object containing the parameter marginals, i.e. name, + distribution type and distribution parameters or available raw data. + method : str + Type of the experimental design. The default is `'normal'`. Other + option is `'sequential'`. + meta_Model_type : str + Type of the meta_Model_type. + sampling_method : str + Name of the sampling method for the experimental design. The following + sampling method are supported: + + * random + * latin_hypercube + * sobol + * halton + * hammersley + * chebyshev(FT) + * grid(FT) + * user + hdf5_file : str + Name of the hdf5 file that contains the experimental design. + n_new_samples : int + Number of (initial) training points. + n_max_samples : int + Number of maximum training points. + mod_LOO_threshold : float + The modified leave-one-out cross validation threshold where the + sequential design stops. + tradeoff_scheme : str + Trade-off scheme to assign weights to the exploration and exploitation + scores in the sequential design. + n_canddidate : int + Number of candidate training sets to calculate the scores for. + explore_method : str + Type of the exploration method for the sequential design. The following + methods are supported: + + * Voronoi + * random + * latin_hypercube + * LOOCV + * dual annealing + exploit_method : str + Type of the exploitation method for the sequential design. The + following methods are supported: + + * BayesOptDesign + * BayesActDesign + * VarOptDesign + * alphabetic + * Space-filling + util_func : str or list + The utility function to be specified for the `exploit_method`. For the + available utility functions see Note section. + n_cand_groups : int + Number of candidate groups. Each group of candidate training sets will + be evaulated separately in parallel. + n_replication : int + Number of replications. Only for comparison. The default is 1. + post_snapshot : int + Whether to plot the posterior in the sequential design. The default is + `True`. + step_snapshot : int + The number of steps to plot the posterior in the sequential design. The + default is 1. + max_a_post : list or array + Maximum a posteriori of the posterior distribution, if known. The + default is `[]`. + adapt_verbose : bool + Whether to plot the model response vs that of metamodel for the new + trining point in the sequential design. + + Note + ---------- + The following utiliy functions for the **exploitation** methods are + supported: + + #### BayesOptDesign (when data is available) + - DKL (Kullback-Leibler Divergence) + - DPP (D-Posterior-percision) + - APP (A-Posterior-percision) + + #### VarBasedOptDesign -> when data is not available + - Entropy (Entropy/MMSE/active learning) + - EIGF (Expected Improvement for Global fit) + - LOOCV (Leave-one-out Cross Validation) + + #### alphabetic + - D-Opt (D-Optimality) + - A-Opt (A-Optimality) + - K-Opt (K-Optimality) + """ + + def __init__(self, Input, method='normal', meta_Model_type='pce', + sampling_method='random', hdf5_file=None, + n_new_samples=1, n_max_samples=None, mod_LOO_threshold=1e-16, + tradeoff_scheme=None, n_canddidate=1, explore_method='random', + exploit_method='Space-filling', util_func='Space-filling', + n_cand_groups=4, n_replication=1, post_snapshot=False, + step_snapshot=1, max_a_post=[], adapt_verbose=False): + + self.InputObj = Input + self.method = method # TODO: this still doing sth? + self.meta_Model_type = meta_Model_type + self.sampling_method = sampling_method + self.hdf5_file = hdf5_file + self.n_new_samples = n_new_samples + self.n_max_samples = n_max_samples + self.mod_LOO_threshold = mod_LOO_threshold + self.explore_method = explore_method + self.exploit_method = exploit_method + self.util_func = util_func + self.tradeoff_scheme = tradeoff_scheme + self.n_canddidate = n_canddidate + self.n_cand_groups = n_cand_groups + self.n_replication = n_replication + self.post_snapshot = post_snapshot + self.step_snapshot = step_snapshot + self.max_a_post = max_a_post + self.adapt_verbose = adapt_verbose + + # Other + self.apce = None + self.ndim = None + + # Init + self.check_valid_inputs() + + + def check_valid_inputs(self)-> None: + """ + Check if the given InputObj is valid to use for further calculations: + Has some Marginals + Marginals have valid priors + All Marginals given as the same type (samples vs dist) + + Returns + ------- + None + + """ + Inputs = self.InputObj + self.ndim = len(Inputs.Marginals) + + # Check if PCE or aPCE metamodel is selected. + # TODO: test also for 'pce'?? + if self.meta_Model_type.lower() == 'apce': + self.apce = True + else: + self.apce = False + + # check if marginals given + if not self.ndim >=1: + raise AssertionError('Cannot build distributions if no marginals are given') + + # check that each marginal is valid + for marginals in Inputs.Marginals: + if len(marginals.input_data) == 0: + if marginals.dist_type == None: + raise AssertionError('Not all marginals were provided priors') + break + if np.array(marginals.input_data).shape[0] and (marginals.dist_type != None): + raise AssertionError('Both samples and distribution type are given. Please choose only one.') + break + + # Check if input is given as dist or input_data. + self.input_data_given = -1 + for marg in Inputs.Marginals: + #print(self.input_data_given) + size = np.array(marg.input_data).shape[0] + #print(f'Size: {size}') + if size and abs(self.input_data_given) !=1: + self.input_data_given = 2 + break + if (not size) and self.input_data_given > 0: + self.input_data_given = 2 + break + if not size: + self.input_data_given = 0 + if size: + self.input_data_given = 1 + + if self.input_data_given == 2: + raise AssertionError('Distributions cannot be built as the priors have different types') + + + # Get the bounds if input_data are directly defined by user: + if self.input_data_given: + for i in range(self.ndim): + low_bound = np.min(Inputs.Marginals[i].input_data) + up_bound = np.max(Inputs.Marginals[i].input_data) + Inputs.Marginals[i].parameters = [low_bound, up_bound] + + # ------------------------------------------------------------------------- + def generate_ED(self, n_samples, transform=False, + max_pce_deg=None): + """ + Generates experimental designs (training set) with the given method. + + Parameters + ---------- + n_samples : int + Number of requested training points. + sampling_method : str, optional + Sampling method. The default is `'random'`. + transform : bool, optional + Isoprobabilistic transformation. The default is `False`. + max_pce_deg : int, optional + Maximum PCE polynomial degree. The default is `None`. + + Returns + ------- + None + + """ + if n_samples <0: + raise ValueError('A negative number of samples cannot be created. Please provide positive n_samples') + n_samples = int(n_samples) + + if not hasattr(self, 'n_init_samples'): + self.n_init_samples = n_samples + + # Generate the samples based on requested method + self.init_param_space(max_pce_deg) + + + # ------------------------------------------------------------------------- + def init_param_space(self, max_deg=None): + """ + Initializes parameter space. + + Parameters + ---------- + max_deg : int, optional + Maximum degree. The default is `None`. + + Returns + ------- + raw_data : array of shape (n_params, n_samples) + Raw data. + bound_tuples : list of tuples + A list containing lower and upper bounds of parameters. + + """ + Inputs = self.InputObj + ndim = self.ndim + rosenblatt_flag = Inputs.Rosenblatt + mc_size = 50000 + + # Save parameter names + self.par_names = [] + for parIdx in range(ndim): + self.par_names.append(Inputs.Marginals[parIdx].name) + + # Create a multivariate probability distribution + # TODO: change this to make max_deg obligatory? at least in some specific cases? + if max_deg is not None: + JDist, poly_types = self.build_polytypes(rosenblatt=rosenblatt_flag) + self.JDist, self.poly_types = JDist, poly_types + + if self.input_data_given: + self.MCSize = len(Inputs.Marginals[0].input_data) + self.raw_data = np.zeros((ndim, self.MCSize)) + + for parIdx in range(ndim): + # Save parameter names + try: + self.raw_data[parIdx] = np.array( + Inputs.Marginals[parIdx].input_data) + except: + self.raw_data[parIdx] = self.JDist[parIdx].sample(mc_size) + + else: + # Generate random samples based on parameter distributions + self.raw_data = chaospy.generate_samples(mc_size, + domain=self.JDist) + + # Extract moments + for parIdx in range(ndim): + mu = np.mean(self.raw_data[parIdx]) + std = np.std(self.raw_data[parIdx]) + self.InputObj.Marginals[parIdx].moments = [mu, std] + + # Generate the bounds based on given inputs for marginals + bound_tuples = [] + for i in range(ndim): + if Inputs.Marginals[i].dist_type == 'unif': + low_bound, up_bound = Inputs.Marginals[i].parameters + else: + low_bound = np.min(self.raw_data[i]) + up_bound = np.max(self.raw_data[i]) + + bound_tuples.append((low_bound, up_bound)) + + self.bound_tuples = tuple(bound_tuples) + + # ------------------------------------------------------------------------- + def build_polytypes(self, rosenblatt): + """ + Creates the polynomial types to be passed to univ_basis_vals method of + the MetaModel object. + + Parameters + ---------- + rosenblatt : bool + Rosenblatt transformation flag. + + Returns + ------- + orig_space_dist : object + A chaospy JDist object or a gaussian_kde object. + poly_types : list + List of polynomial types for the parameters. + + """ + Inputs = self.InputObj + + all_data = [] + all_dist_types = [] + orig_joints = [] + poly_types = [] + + for parIdx in range(self.ndim): + + if Inputs.Marginals[parIdx].dist_type is None: + data = Inputs.Marginals[parIdx].input_data + all_data.append(data) + dist_type = None + else: + dist_type = Inputs.Marginals[parIdx].dist_type + params = Inputs.Marginals[parIdx].parameters + + if rosenblatt: + polytype = 'hermite' + dist = chaospy.Normal() + + elif dist_type is None: + polytype = 'arbitrary' + dist = None + + elif 'unif' in dist_type.lower(): + polytype = 'legendre' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Uniform(lower=params[0], upper=params[1]) + + elif 'norm' in dist_type.lower() and \ + 'log' not in dist_type.lower(): + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'hermite' + dist = chaospy.Normal(mu=params[0], sigma=params[1]) + + elif 'gamma' in dist_type.lower(): + polytype = 'laguerre' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Gamma(shape=params[0], + scale=params[1], + shift=params[2]) + + elif 'beta' in dist_type.lower(): + if not np.array(params).shape[0]>=4: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'jacobi' + dist = chaospy.Beta(alpha=params[0], beta=params[1], + lower=params[2], upper=params[3]) + + elif 'lognorm' in dist_type.lower(): + polytype = 'hermite' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + mu = np.log(params[0]**2/np.sqrt(params[0]**2 + params[1]**2)) + sigma = np.sqrt(np.log(1 + params[1]**2 / params[0]**2)) + dist = chaospy.LogNormal(mu, sigma) + # dist = chaospy.LogNormal(mu=params[0], sigma=params[1]) + + elif 'expon' in dist_type.lower(): + polytype = 'exponential' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Exponential(scale=params[0], shift=params[1]) + + elif 'weibull' in dist_type.lower(): + polytype = 'weibull' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Weibull(shape=params[0], scale=params[1], + shift=params[2]) + + else: + message = (f"DistType {dist_type} for parameter" + f"{parIdx+1} is not available.") + raise ValueError(message) + + if self.input_data_given or self.apce: + polytype = 'arbitrary' + + # Store dists and poly_types + orig_joints.append(dist) + poly_types.append(polytype) + all_dist_types.append(dist_type) + + # Prepare final output to return + if None in all_dist_types: + # Naive approach: Fit a gaussian kernel to the provided data + Data = np.asarray(all_data) + try: + orig_space_dist = st.gaussian_kde(Data) + except: + raise ValueError('The samples provided to the Marginals should be 1D only') + self.prior_space = orig_space_dist + else: + orig_space_dist = chaospy.J(*orig_joints) + try: + self.prior_space = st.gaussian_kde(orig_space_dist.sample(10000)) + except: + raise ValueError('Parameter values are not valid, please set differently') + + return orig_space_dist, poly_types + + # ------------------------------------------------------------------------- + def transform(self, X, params=None, method=None): + """ + Transform the samples via either a Rosenblatt or an isoprobabilistic + transformation. + + Parameters + ---------- + X : array of shape (n_samples,n_params) + Samples to be transformed. + method : string + If transformation method is 'user' transform X, else just pass X. + + Returns + ------- + tr_X: array of shape (n_samples,n_params) + Transformed samples. + + """ + # Check for built JDist + if not hasattr(self, 'JDist'): + raise AttributeError('Call function init_param_space first to create JDist') + + # Check if X is 2d + if X.ndim != 2: + raise AttributeError('X should have two dimensions') + + # Check if size of X matches Marginals + if X.shape[1]!= self.ndim: + raise AttributeError('The second dimension of X should be the same size as the number of marginals in the InputObj') + + if self.InputObj.Rosenblatt: + self.origJDist, _ = self.build_polytypes(False) + if method == 'user': + tr_X = self.JDist.inv(self.origJDist.fwd(X.T)).T + else: + # Inverse to original spcace -- generate sample ED + tr_X = self.origJDist.inv(self.JDist.fwd(X.T)).T + else: + # Transform samples via an isoprobabilistic transformation + n_samples, n_params = X.shape + Inputs = self.InputObj + origJDist = self.JDist + poly_types = self.poly_types + + disttypes = [] + for par_i in range(n_params): + disttypes.append(Inputs.Marginals[par_i].dist_type) + + # Pass non-transformed X, if arbitrary PCE is selected. + if None in disttypes or self.input_data_given or self.apce: + return X + + cdfx = np.zeros((X.shape)) + tr_X = np.zeros((X.shape)) + + for par_i in range(n_params): + + # Extract the parameters of the original space + disttype = disttypes[par_i] + if disttype is not None: + dist = origJDist[par_i] + else: + dist = None + polytype = poly_types[par_i] + cdf = np.vectorize(lambda x: dist.cdf(x)) + + # Extract the parameters of the transformation space based on + # polyType + if polytype == 'legendre' or disttype == 'uniform': + # Generate Y_Dists based + params_Y = [-1, 1] + dist_Y = st.uniform(loc=params_Y[0], + scale=params_Y[1]-params_Y[0]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'hermite' or disttype == 'norm': + params_Y = [0, 1] + dist_Y = st.norm(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'laguerre' or disttype == 'gamma': + params_Y = [1, params[1]] + dist_Y = st.gamma(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + # Compute CDF_x(X) + cdfx[:, par_i] = cdf(X[:, par_i]) + + # Compute invCDF_y(cdfx) + tr_X[:, par_i] = inv_cdf(cdfx[:, par_i]) + + return tr_X + + diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index fb579cf6a..c3abe2af8 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -99,7 +99,7 @@ class MetaModel(): """ - def __init__(self, input_obj, model_obj, meta_model_type='PCE', + def __init__(self, input_obj, model_obj = None, meta_model_type='PCE', pce_reg_method='OLS', bootstrap_method='fast', n_bootstrap_itrs=1, pce_deg=1, pce_q_norm=1.0, dim_red_method='no', verbose=False): @@ -114,13 +114,61 @@ class MetaModel(): self.pce_q_norm = pce_q_norm self.dim_red_method = dim_red_method self.verbose = verbose - - def check_init(self) -> None: - """ - Checks all the parameter values given to the constructor against - reference lists of what is implemented and can be run + + def build_metamodel(self) -> None: """ + Builds the parts for the metamodel (polynomes,...) that are neede before fitting. + + Returns + ------- None + DESCRIPTION. + + """ + if not hasattr(self, 'CollocationPoints'): + raise AttributeError('Please provide samples to the metamodle before building it.') + + self.n_params = len(self.input_obj.Marginals) + + # Generate polynomials + self.generate_polynomials(np.max(self.pce_deg)) + + # Initialize the nested dictionaries + if self.meta_model_type.lower() == 'gpe': + self.gp_poly = self.auto_vivification() + self.x_scaler = self.auto_vivification() + self.LCerror = self.auto_vivification() + else: + self.deg_dict = self.auto_vivification() + self.q_norm_dict = self.auto_vivification() + self.coeffs_dict = self.auto_vivification() + self.basis_dict = self.auto_vivification() + self.score_dict = self.auto_vivification() + self.clf_poly = self.auto_vivification() + self.LCerror = self.auto_vivification() + if self.dim_red_method.lower() == 'pca': + self.pca = self.auto_vivification() + + # Define an array containing the degrees + self.n_samples, ndim = self.CollocationPoints.shape + self.deg_array = self.__select_degree(ndim, self.n_samples) + + # Generate all basis indices + self.allBasisIndices = self.auto_vivification() + for deg in self.deg_array: + keys = self.allBasisIndices.keys() + if deg not in np.fromiter(keys, dtype=float): + # Generate the polynomial basis indices + for qidx, q in enumerate(self.pce_q_norm): + basis_indices = glexindex(start=0, stop=deg+1, + dimensions=self.n_params, + cross_truncation=q, + reverse=False, graded=True) + self.allBasisIndices[str(deg)][str(q)] = basis_indices + + # Evaluate the univariate polynomials on ExpDesign + if self.meta_model_type.lower() != 'gpe': + self.univ_p_val = self.univ_basis_vals(self.CollocationPoints) # ------------------------------------------------------------------------- @@ -136,24 +184,23 @@ class MetaModel(): """ Model = self.ModelObj - self.n_params = len(self.input_obj.Marginals) self.ExpDesignFlag = 'normal' # --- Prepare pce degree --- if self.meta_model_type.lower() == 'pce': if type(self.pce_deg) is not np.ndarray: self.pce_deg = np.array(self.pce_deg) - if self.ExpDesign.method == 'sequential': - raise Exception( - "Please use MetaModelEngine class for the sequential design!" - ) + #if self.ExpDesign.method == 'sequential': + # raise Exception( + # "Please use MetaModelEngine class for the sequential design!" + # ) - elif self.ExpDesign.method == 'normal': - self.train_norm_design(verbose=True) + #elif self.ExpDesign.method == 'normal': + self.train_norm_design(verbose=True) - else: - raise Exception("The method for experimental design you requested" - " has not been implemented yet.") + #else: + # raise Exception("The method for experimental design you requested" + # " has not been implemented yet.") # Zip the model run directories # TODO: can move this elsewhere? Is this important? @@ -164,7 +211,7 @@ class MetaModel(): return self # ------------------------------------------------------------------------- - def train_norm_design(self, parallel=True, verbose=False): + def train_norm_design(self, n_samples = None, parallel=True, verbose=False): """ This function loops over the outputs and each time step/point and fits the meta model. @@ -182,56 +229,46 @@ class MetaModel(): Meta-model object. """ - Model = self.ModelObj # Get the collocation points to run the forward model - CollocationPoints, OutputDict = self.generate_ExpDesign(Model) + if not hasattr(self.ExpDesign, 'n_init_samples'): + if n_samples == None or n_samples<=0: + raise ValueError('The number of samples provided for static training is not valid, please provide an int>0') + self.CollocationPoints, self.OutputDict = self.generate_ExpDesign(self.ModelObj, n_samples) + else: + self.CollocationPoints, self.OutputDict = self.generate_ExpDesign(self.ModelObj, self.ExpDesign.n_init_samples) + + if 'x_values' in self.OutputDict: + self.ExpDesign.x_values = self.OutputDict['x_values'] + del self.OutputDict['x_values'] - # Generate polynomials - self.generate_polynomials(np.max(self.pce_deg)) self.ndim = self.ExpDesign.ndim + + # Build the metamodel if not done already + self.build_metamodel() + + # Fit the surrogate + self.fit(self.CollocationPoints, self.OutputDict, parallel, verbose) + + + def fit(self, X, y, parallel = True, verbose = False): + """ + Fits the surrogate to the given data (samples X, outputs y). + Note here that the samples X should be the transformed samples provided + by the experimental design if the transformation is used there. + Parameters + ---------- + X : TYPE + DESCRIPTION. + y : TYPE + DESCRIPTION. - # Initialize the nested dictionaries - if self.meta_model_type.lower() == 'gpe': - self.gp_poly = self.auto_vivification() - self.x_scaler = self.auto_vivification() - self.LCerror = self.auto_vivification() - else: - self.deg_dict = self.auto_vivification() - self.q_norm_dict = self.auto_vivification() - self.coeffs_dict = self.auto_vivification() - self.basis_dict = self.auto_vivification() - self.score_dict = self.auto_vivification() - self.clf_poly = self.auto_vivification() - self.LCerror = self.auto_vivification() - if self.dim_red_method.lower() == 'pca': - self.pca = self.auto_vivification() - - # Define an array containing the degrees - n_samples, ndim = CollocationPoints.shape - self.deg_array = self.__select_degree(ndim, n_samples) - - # Generate all basis indices - self.allBasisIndices = self.auto_vivification() - for deg in self.deg_array: - keys = self.allBasisIndices.keys() - if deg not in np.fromiter(keys, dtype=float): - # Generate the polynomial basis indices - for qidx, q in enumerate(self.pce_q_norm): - basis_indices = glexindex(start=0, stop=deg+1, - dimensions=self.n_params, - cross_truncation=q, - reverse=False, graded=True) - self.allBasisIndices[str(deg)][str(q)] = basis_indices - - # Evaluate the univariate polynomials on ExpDesign - if self.meta_model_type.lower() != 'gpe': - univ_p_val = self.univ_basis_vals(CollocationPoints) - - if 'x_values' in OutputDict: - self.ExpDesign.x_values = OutputDict['x_values'] - del OutputDict['x_values'] + Returns + ------- + None. + """ + # --- Loop through data points and fit the surrogate --- if verbose: print(f"\n>>>> Training the {self.meta_model_type} metamodel " @@ -262,16 +299,16 @@ class MetaModel(): # Loop over the bootstrap iterations for b_i in enum_obj: if b_i > 0: - b_indices = np.random.randint(n_samples, size=n_samples) + b_indices = np.random.randint(self.n_samples, size=self.n_samples) else: - b_indices = np.arange(len(CollocationPoints)) + b_indices = np.arange(len(X)) - X_train_b = CollocationPoints[b_indices] + X_train_b = X[b_indices] if verbose and self.n_bootstrap_itrs == 1: - items = tqdm(OutputDict.items(), desc="Fitting regression") + items = tqdm(y.items(), desc="Fitting regression") else: - items = OutputDict.items() + items = y.items() # For loop over the components/outputs for key, Output in items: @@ -318,7 +355,7 @@ class MetaModel(): self.gp_poly[f'b_{b_i+1}'][key][f"y_{idx+1}"] = out[idx] else: - self.univ_p_val = univ_p_val[b_indices] + self.univ_p_val = self.univ_p_val[b_indices] if parallel and (not fast_bootstrap or b_i == 0): out = Parallel(n_jobs=-1, backend='multiprocessing')( delayed(self.adaptive_regression)(X_train_b, @@ -394,7 +431,7 @@ class MetaModel(): psi = self.create_psi(basis_indices, self.univ_p_val) # Calulate the cofficients of surrogate model - updated_out = self.fit( + updated_out = self.regression( psi, y[:, i], basis_indices, reg_method='OLS', sparsity=False ) @@ -418,7 +455,7 @@ class MetaModel(): meta_Model_type=self.meta_model_type) # ------------------------------------------------------------------------- - def generate_ExpDesign(self, Model): + def generate_ExpDesign(self, Model, n_samples): """ Prepares the experimental design either by reading from the prescribed data or running simulations. @@ -445,61 +482,23 @@ class MetaModel(): self.add_ExpDesign() ExpDesign = self.ExpDesign - if self.ExpDesignFlag != 'sequential': - # Read ExpDesign (training and targets) from the provided hdf5 - if ExpDesign.hdf5_file is not None: - - # Read hdf5 file - f = h5py.File(ExpDesign.hdf5_file, 'r+') - - # Read EDX and pass it to ExpDesign object - try: - ExpDesign.X = np.array(f["EDX/New_init_"]) - except KeyError: - ExpDesign.X = np.array(f["EDX/init_"]) - - # Update number of initial samples - ExpDesign.n_init_samples = ExpDesign.X.shape[0] - - # Read EDX and pass it to ExpDesign object - out_names = self.ModelObj.Output.names - ExpDesign.Y = {} - - # Extract x values - try: - ExpDesign.Y["x_values"] = dict() - for varIdx, var in enumerate(out_names): - x = np.array(f[f"x_values/{var}"]) - ExpDesign.Y["x_values"][var] = x - except KeyError: - ExpDesign.Y["x_values"] = np.array(f["x_values"]) - - # Store the output - for varIdx, var in enumerate(out_names): - try: - y = np.array(f[f"EDY/{var}/New_init_"]) - except KeyError: - y = np.array(f[f"EDY/{var}/init_"]) - ExpDesign.Y[var] = y - f.close() - else: - # Check if an old hdf5 file exists: if yes, rename it - hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' - if os.path.exists(hdf5file): - os.rename(hdf5file, 'old_'+hdf5file) + # Read ExpDesign (training and targets) from the provided hdf5 + if ExpDesign.hdf5_file is not None: + # TODO: need to run 'generate_ED' as well after this or not? + ExpDesign.read_from_file(self.out_names) + else: + # Check if an old hdf5 file exists: if yes, rename it + hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' + if os.path.exists(hdf5file): + os.rename(hdf5file, 'old_'+hdf5file) # ---- Prepare X samples ---- - ED_X, ED_X_tr = ExpDesign.generate_ED(ExpDesign.n_init_samples, - transform=True, - max_pce_deg=np.max(self.pce_deg)) - ExpDesign.X = ED_X - ExpDesign.collocationPoints = ED_X_tr + ExpDesign.generate_ED(n_samples, transform=True, max_pce_deg=np.max(self.pce_deg)) # ---- Run simulations at X ---- if not hasattr(ExpDesign, 'Y') or ExpDesign.Y is None: print('\n Now the forward model needs to be run!\n') - ED_Y, up_ED_X = Model.run_model_parallel(ED_X) - ExpDesign.X = up_ED_X + ED_Y, up_ED_X = Model.run_model_parallel(ExpDesign.X) self.ModelOutputDict = ED_Y ExpDesign.Y = ED_Y else: @@ -510,7 +509,7 @@ class MetaModel(): raise Exception('Please provide either a dictionary or a hdf5' 'file to ExpDesign.hdf5_file argument.') - return ED_X_tr, self.ModelOutputDict + return ExpDesign.X_tr, self.ModelOutputDict # ------------------------------------------------------------------------- def univ_basis_vals(self, samples, n_max=None): @@ -606,7 +605,7 @@ class MetaModel(): return psi # ------------------------------------------------------------------------- - def fit(self, X, y, basis_indices, reg_method=None, sparsity=True): + def regression(self, X, y, basis_indices, reg_method=None, sparsity=True): """ Fit regression using the regression method provided. @@ -783,7 +782,7 @@ class MetaModel(): Psi = self.create_psi(BasisIndices, self.univ_p_val) # Calulate the cofficients of the meta model - outs = self.fit(Psi, ED_Y, BasisIndices) + outs = self.regression(Psi, ED_Y, BasisIndices) # Calculate and save the score of LOOCV score, LCerror = self.corr_loocv_error(outs['clf_poly'], @@ -1176,8 +1175,6 @@ class MetaModel(): std_pred : dict Standard deviatioon of the predictions. """ - outputs = self.ModelObj.Output.names - # Generate or transform (if need be) samples if samples is None: # Generate @@ -1276,7 +1273,7 @@ class MetaModel(): mean_pred_all[k][i] = v # Compute the moments of predictions over the predictions - for output in outputs: + for output in self.out_names: # Only use bootstraps with finite values finite_rows = np.isfinite( mean_pred_all[output]).all(axis=2).all(axis=1) @@ -1295,7 +1292,7 @@ class MetaModel(): return mean_pred, std_pred # ------------------------------------------------------------------------- - def create_model_error(self, X, y, name='Calib'): + def create_model_error(self, X, y, Model, name='Calib'): """ Fits a GPE-based model error. @@ -1315,7 +1312,6 @@ class MetaModel(): Self object. """ - Model = self.ModelObj outputNames = Model.Output.names self.errorRegMethod = 'GPE' self.errorclf_poly = self.auto_vivification() @@ -1473,13 +1469,10 @@ class MetaModel(): n_combo[i] /= math.factorial(ndim) * math.factorial(d) return n_combo - if self.ExpDesignFlag != 'sequential': - deg_new = max_deg - else: - d = nitr if nitr != 0 and self.n_params > 5 else 1 - min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d)) - deg_new = max_deg - # deg_new = range(1, max_deg+1)[min_index] + deg_new = max_deg + d = nitr if nitr != 0 and self.n_params > 5 else 1 + min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d)) + # deg_new = range(1, max_deg+1)[min_index] if deg_new > min_Deg and self.pce_reg_method.lower() != 'fastard': deg_array = np.arange(min_Deg, deg_new+1) diff --git a/tests/test_ExpDesign.py b/tests/test_ExpDesign.py index 6a383cae6..72a798804 100644 --- a/tests/test_ExpDesign.py +++ b/tests/test_ExpDesign.py @@ -9,6 +9,7 @@ Class ExpDesigns: check_valid_inputs - x generate_samples generate_ED + read_from_file init_param_space - x build_polytypes - x random_sampler @@ -595,6 +596,21 @@ def test_generate_ED() -> None: exp = ExpDesigns(inp) samples = exp.generate_ED(4) +def test_generate_ED_negsamplenum(): + """ + Generate ED for neg number of samples + """ + x = np.random.uniform(0,1,1000) + X = np.random.uniform(0,1,(1,10)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = ExpDesigns(inp, sampling_method = 'user') + with pytest.raises(ValueError) as excinfo: + exp.generate_ED(-1) + assert str(excinfo.value) == 'A negative number of samples cannot be created. Please provide positive n_samples' + + def test_generate_ED_usernoX() -> None: """ @@ -637,6 +653,31 @@ def test_generate_ED_userX() -> None: exp = ExpDesigns(inp, sampling_method = 'user') exp.X = X samples = exp.generate_ED(4) + + +#%% Test ExpDesign.read_from_file + +def test_read_from_file(): + """ + No file given to read in + """ + x = np.random.uniform(0,1,1000) + X = np.random.uniform(0,1,(1,10)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = ExpDesigns(inp, sampling_method = 'user') + with pytest.raises(AttributeError) as excinfo: + exp.read_from_file(['Out']) + assert str(excinfo.value) == 'ExpDesign cannot be read in, please provide hdf5 file first' + +# TODO: test with provided file + + +#%% Test ExpDesign.generate_ED(self, n_samples, transform=False, max_pce_deg=None) + + + if __name__ == '__main__': None diff --git a/tests/test_InputSpace.py b/tests/test_InputSpace.py new file mode 100644 index 000000000..8c5476be4 --- /dev/null +++ b/tests/test_InputSpace.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- +""" +Test the InputSpace class in bayesvalidrox. +Tests are available for the following functions +Class InputSpace: + check_valid_inputs - x + generate_samples + init_param_space - x + build_polytypes - x + transform - x + +@author: Rebecca Kohlhaas +""" +import sys +sys.path.append("src/") +import pytest +import numpy as np + +from bayesvalidrox.surrogate_models.inputs import Input +import bayesvalidrox.surrogate_models.input_space as exp +from bayesvalidrox.surrogate_models.input_space import InputSpace + +#%% Test ExpDesign.check_valid_input + +def test_check_valid_input_hasmarg() -> None: + """ + Distribution not built if no marginals set + """ + inp = Input() + with pytest.raises(AssertionError) as excinfo: + init = InputSpace(inp) + assert str(excinfo.value) == 'Cannot build distributions if no marginals are given' + +def test_check_valid_input_haspriors() -> None: + """ + Distribution not built if no distribution set for the marginals + """ + inp = Input() + inp.add_marginals() + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Not all marginals were provided priors' + +def test_check_valid_input_priorsmatch() -> None: + """ + Distribution not built if dist types do not align + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.add_marginals() + inp.Marginals[1].dist_type = 'normal' + inp.Marginals[1].parameters = [0,1] + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Distributions cannot be built as the priors have different types' + +def test_check_valid_input_samples() -> None: + """ + Design built correctly - samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.add_marginals() + inp.Marginals[1].input_data = x+2 + try: + exp = InputSpace(inp) + except AssertionError: + pytest.fail("ExpDesign raised AssertionError unexpectedly!") + # TODO: check for better options to assert that no error at all occurred + + +def test_check_valid_input_both() -> None: + """ + Design no built - samples and dist type given + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + inp.Marginals[0].dist_type = 'normal' + with pytest.raises(AssertionError) as excinfo: + exp = InputSpace(inp) + assert str(excinfo.value) == 'Both samples and distribution type are given. Please choose only one.' + +#def test_check_valid_input_distnotok() -> None: +# """ +# Design built incorrectly - dist types without parameters +# """ +# inp = Input() +# inp.add_marginals() +# inp.Marginals[0].dist_type = 'normal' +# inp.add_marginals() +# inp.Marginals[1].dist_type = 'normal' +# with pytest.raises(AssertionError) as excinfo: +# exp = ExpDesigns(inp) +# assert str(excinfo.value) == 'Some distributions do not have characteristic values' + +def test_check_valid_input_distok() -> None: + """ + Design built correctly - dist types + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + inp.add_marginals() + inp.Marginals[1].dist_type = 'normal' + inp.Marginals[1].parameters = [0,1] + try: + exp = InputSpace(inp) + except AssertionError: + pytest.fail("ExpDesign raised AssertionError unexpectedly!") + # TODO: check for better options to assert that no error at all occurred + +#%% Test ExpDesign.build_polytypes +def test_build_polytypes_normalerr() -> None: + """ + Build dist 'normal' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_normal() -> None: + """ + Build dist 'normal' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_uniferr() -> None: + """ + Build dist 'unif' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'unif' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_unif() -> None: + """ + Build dist 'unif' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'unif' + inp.Marginals[0].parameters = [0,1] + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_gammaerr() -> None: + """ + Build dist 'gamma' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'gamma' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_gamma() -> None: + """ + Build dist 'gamma' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'gamma' + inp.Marginals[0].parameters = [0,1,0] + exp = InputSpace(inp) + with pytest.raises(ValueError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Parameter values are not valid, please set differently' + +def test_build_polytypes_betaerr() -> None: + """ + Build dist 'beta' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_beta() -> None: + """ + Build dist 'beta' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_lognormerr() -> None: + """ + Build dist 'lognorm' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'lognorm' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_lognorm() -> None: + """ + Build dist 'lognorm' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'lognorm' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_exponerr() -> None: + """ + Build dist 'expon' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'beta' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_expon() -> None: + """ + Build dist 'expon' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_weibullerr() -> None: + """ + Build dist 'weibull' - too few params + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'weibull' + inp.Marginals[0].parameters = [] + exp = InputSpace(inp) + with pytest.raises(AssertionError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'Distribution has too few parameters!' + +def test_build_polytypes_weibull() -> None: + """ + Build dist 'weibull' + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'weibull' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_arbitrary() -> None: + """ + Build poly 'arbitrary' + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(False) + +def test_build_polytypes_rosenblatt() -> None: + """ + Build dist with rosenblatt + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(True) + +def test_build_polytypes_samples() -> None: + """ + Build dist from samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.build_polytypes(False) + + +def test_build_polytypes_samples2d() -> None: + """ + Build dist from samples - samples too high dim + """ + x = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + with pytest.raises(ValueError) as excinfo: + exp.build_polytypes(False) + assert str(excinfo.value) == 'The samples provided to the Marginals should be 1D only' + +#%% Test ExpDesign.init_param_space + +def test_init_param_space_nomaxdegsample() -> None: + """ + Init param space without max_deg for given samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space() + +def test_init_param_space_nomaxdegdist() -> None: + """ + Init param space without max_deg for given dist + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.init_param_space() + +def test_init_param_space_maxdeg() -> None: + """ + Init param space with max_deg for given samples + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + +def test_init_param_space_maxdegdist() -> None: + """ + Init param space with max_deg for given dist + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'expon' + inp.Marginals[0].parameters = [0.5,1,2,3] + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + + +#%% Test ExpDesign.transform + +def test_transform_noparamspace() -> None: + """ + Call transform without a built JDist + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'Call function init_param_space first to create JDist' + +def test_transform_dimerrlow() -> None: + """ + Call transform with too few dimensions + """ + x = np.random.uniform(0,1,1000) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(x) + assert str(excinfo.value) == 'X should have two dimensions' + +def test_transform_dimerrhigh() -> None: + """ + Call transform with too many dimensions + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(1,1,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'X should have two dimensions' + +def test_transform_dimerr0() -> None: + """ + Call transform with wrong X.shape[0] + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(2,1000)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + with pytest.raises(AttributeError) as excinfo: + exp.transform(y) + assert str(excinfo.value) == 'The second dimension of X should be the same size as the number of marginals in the InputObj' + +def test_transform_paramspace() -> None: + """ + Transform successfully + """ + x = np.random.uniform(0,1,1000) + y = np.random.uniform(0,1,(1000,1)) + inp = Input() + inp.add_marginals() + inp.Marginals[0].input_data = x + exp = InputSpace(inp) + exp.init_param_space(max_deg=2) + exp.transform(y) + + +if __name__ == '__main__': + None diff --git a/tests/test_MetaModel.py b/tests/test_MetaModel.py index d8bcfe0e1..7310b3708 100644 --- a/tests/test_MetaModel.py +++ b/tests/test_MetaModel.py @@ -3,6 +3,7 @@ Test the MetaModel class in bayesvalidrox. Tests are available for the following functions Class MetaModel: + build_metamodel create_metamodel *not used again train_norm_design update_pce_coeffs @@ -38,6 +39,34 @@ from bayesvalidrox.pylink.pylink import PyLinkForwardModel as PL #%% Test MetaMod constructor on its own def test_metamod() -> None: + """ + Construct MetaModel without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + +#%% Test MetaModel.build_metamodel + +def test_build_metamodel_nosamples() -> None: + """ + Build MetaModel without samples + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + with pytest.raises(AttributeError) as excinfo: + mm.build_metamodel() + assert str(excinfo.value) == 'Please provide samples to the metamodle before building it.' + + +def test_build_metamodel() -> None: """ Build MetaModel without inputs """ @@ -47,6 +76,8 @@ def test_metamod() -> None: inp.Marginals[0].parameters = [0,1] mod = PL() mm = MetaModel(inp, mod) + mm.build_metamodel() + 'Please provide samples to the metamodle before building it.' #%% Test MetaMod.generate_polynomials @@ -97,7 +128,7 @@ def test_generate_polynomials_deg() -> None: def test_add_ExpDesign() -> None: """ - Generate ExpDesign in class + Add ExpDesign in MetaModel """ inp = Input() inp.add_marginals() @@ -107,6 +138,23 @@ def test_add_ExpDesign() -> None: mm = MetaModel(inp, mod) mm.add_ExpDesign() +#%% Test MetaModel.generate_ExpDesign + +def test_generate_ExpDesign() -> None: + """ + Generate ExpDesign in MetaModel + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp, mod) + mm.generate_ExpDesign(mod, 10) + # TODO: model not working for this, create a workaround/test model! + + + if __name__ == '__main__': None diff --git a/tests/test_engine.py b/tests/test_engine.py new file mode 100644 index 000000000..3a6a7ac43 --- /dev/null +++ b/tests/test_engine.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Tests the class Engine in bayesvalidrox +Tests are available for the following functions +Engine: + + +@author: Rebecca Kohlhaas +""" +import numpy as np + +import sys +sys.path.append("src/") +import pytest + +from bayesvalidrox.surrogate_models.inputs import Input +from bayesvalidrox.surrogate_models.exp_designs import ExpDesigns +from bayesvalidrox.surrogate_models.surrogate_models import MetaModel +from bayesvalidrox.pylink.pylink import PyLinkForwardModel as PL +from bayesvalidrox.surrogate_models.engine import Engine + + +#%% Test Engine constructor + + +def test_engine() -> None: + """ + Build Engine without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp) + expdes = ExpDesigns(inp) + engine = Engine(mm, mod, expdes) + +#%% Test Engine.start_engine + +def test_start_engine() -> None: + """ + Build Engine without inputs + """ + inp = Input() + inp.add_marginals() + inp.Marginals[0].dist_type = 'normal' + inp.Marginals[0].parameters = [0,1] + mod = PL() + mm = MetaModel(inp) + expdes = ExpDesigns(inp) + engine = Engine(mm, mod, expdes) + engine.start_engine() + + +#%% Test Engine.train_normal +# TODO: build mock model to do this? - test again in full-length examples -- GitLab