From cd76c43ab7a8c7d89c6341ac81d6869c4c54329d Mon Sep 17 00:00:00 2001 From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com> Date: Tue, 19 Dec 2023 10:09:47 +0100 Subject: [PATCH] Created second ExpDesign only in MetaModel --- .coverage | Bin 53248 -> 53248 bytes src/bayesvalidrox/surrogate_models/engine.py | 174 +++++- .../surrogate_models/exp_designs.py | 31 -- .../surrogate_models/exp_designs_.py | 508 ++++++++++++++++++ .../surrogate_models/input_space.py | 32 -- .../surrogate_models/surrogate_models.py | 27 +- tests/test_MetaModel.py | 8 +- 7 files changed, 700 insertions(+), 80 deletions(-) create mode 100644 src/bayesvalidrox/surrogate_models/exp_designs_.py diff --git a/.coverage b/.coverage index fb40a106ecddff155efa3de1fc357ccf13425598..fdfc90cd88247a2682ddc7eb5c6dc7e6e44746ed 100644 GIT binary patch delta 1343 zcmZ9MTTC2P7{_PWb7tp0v-cY;sExN&yMzXbhIB!evP+>>LMc`pZVO%7ZkJ1ywm!_3 zMz>rg--DXiN@9cYL8I=AZ<v^>wQBT1Lt`|(Jo?h4Mjm{q-<d+io|pN)|Nno^H)npy z*efo3#r0_<`Pd0nR4yoW@@09SbW@s?WN}q&a{uBkxQXzAkf7hvVHzOcm6OS5iO^X^ zxZ*=l+tLV+rgU0~U5IsZI$t<B^>pD>M-1##Cm#h*y4T3&M|1fssv{8$9)JUx7C6&d zO~crYZgwWzX^rwB(D(N9K}fc>VZwxO_WE!rS8U%(X_l4C=cX(RPaE@N?kM%$_e*W& zc7M=|(}$=B{Vv?!4L_z0-h@BWdES7F?Y*#?F{zI3q~n_H)w~9Y_T#*YZjLJG^e`uw zeRbfNHIW^%CQcq>*T~9f?RPePzfToLrL776>~z5z&Q9h=^OKm%i)aiuxw?*ml4IY- znk)<qWy_y*SQU-qZYtE$E5aqQ+5M}2U4KKbQLjn2XjZwX79>TLw6yr4G$nT^yXCL7 z&uNfcC1=P^*LByjEA`p(KNaOvS3OVm6GY+Zse{*_SG*2@E;a*SpejR<%cdo5v8jaR zu4XMYU1)LM6mu~!dtcJ;V~UOvq~z)8hPhUqMkWYSV%?qA2p<Nmryn`(Ih5Wc<fXx3 zf`l-kZle>x$WiLwv{Ha@w%a}&IYhlj2(r1+dl1>^ya#^o%~LZ+irtR3nlY#`O^^>u znC)?1hh2w{Bc%(6b5tE8$OoOkgM5?`f_T^-#6#t+8NUD1kxmeV!%}s955Va=^l(cP zXZ&USY1}gAj2Zna-PUv3-`WlBvi7Fdsm0ZG<*M?oQc`UBn*5U-lP*bpQioJ8u8Tj2 zb7Dap6kimZ${usW4GpD?n5?3_ROG7Q=hAcDO2SM2O7g`y*UK>`H^d-*{@|`a6!o1^ z^4ckWgy-%pkqhe)l;e?v%TY{{3bQ?O7*@_FwGgYvLbva#99LRiegcBC&F&zIXNQAe z&3^0&;AS(h&KGz1ncC)us=4>8eW>cbBZe2BHQofC+-HY5yE@@PVbBA=%{}EdF@Mf) zmW_GJyA3u+G$6LHN6<0ziVpn?PYN2A1r6R>s1sByPpfccp`k&+vR)B6?w!qBa5gLO zPNGjn?{QfiVZxVhTO2pTuZ&CV^g#(;Sj1bJ9Ly{}!$+WYF@%30W>vUhwiJVpN;Nx* zjRGoXPYbv2ZmdPvDY)#7HBN9U_T6lVF9jd5ojz$x9QPh8E1Vur|8K$gY!q_yI{qP? k)xTvWU~N9+t8v<dh!?T%IlEzIDT4>Wz`U<~b19zu7Z3w%pa1{> delta 1530 zcmY+DZA@EL7{~8z@991F{kFY*0gA1R_!5DEm<=_!qLlRoXvjhp1R3(SA}gif2AYgA z76W5!Hs^zEbB@nW%_h;9Oe4|6WJHN^eh{Ofaegq-IFe~*vTXR=zHOWjx99o&|L30P zod0cd*_d25UM})JIwqQgcZDkc6F$WK!i{qZejh)_Zm@@0(R9{SgT6z@P%U#~#p`{B zF$|TGvE~jf8jHU^(K8<J?Hj0yPqn%jYUW{5(^yZwXp~V2RE|s~`8c|mR5mt{mwXZ` z0v;d@;YPCMH<K@YJaPhQ%aGRns@5MJ?@J8Cdt-f5@X|2{h(kHiPTp?PkR8Yic{{n= zgpmzMM~>`58dRZzzP4XP)`IT#??RS>?)IzXykA6SAo-R`Xcf-ZMGgf-qyX>9yh(3$ zJzzsJkoE$p53008T7#2F1m0cXo{)xw9ZpY(!@Y@qI8c-)7em8{138>uU>O=2n2g5y z2PVM~!yxtMB~sfGCu_~}%9WNbL;cTe%yiE5BrD-dc%Ph>-;zDz2H%X<MOF%M6JmmS zlT*20`A>xl!fxqfG>x8OerGC-X=BRhyOO$NSn(a$Lz8${r*&s`rC|o5YwN5fEj|I_ zlaMROKzw;3`7m8VQU{)g2t`!f$H0BHbw(gQ4iybC5MNuTk3f9N&iI_9CXzsogA62= zb3kG&vcJYaVp&o~+sN39{YWb?x4(?G0t=9ALISencjPmDkHbBg;TYZO$Y`Q>B03ra zpvD+TOYdqupdls7KvH^DA5>_Wv~*4)=`aH+>C6+dBC+6rbl7Y<0(tM{VZ;wI5Dxgv z4+g`HGLVbDy0$x)Nq2i$V;!aZqx`AdP!^O~c~$nyHL^>lq_3oTsY9~Dzv@TfYvHP( z2?8JCHn}Bkj+@}@oPe+4<9G;n-~g^$F{^*Gq+u=?_LU-<o8e2hb{nbtBYkswyV-EC z{MI*XoBxTxF}3A7TKesH#6bP=?~&Vg?;V@|$h~y^W{9E&?dc1&*QLLPauG83tfhpZ zIjw~G?1ZtaSjTn6Bs~|YuoMA*q=-2>MGw;1y$rGw*h9|h?B*)tb7b0akb!wGYuD$) zcJjge7;A(1@qQaAKl!0qgEgx56FpOI)v4b~emi;5Z2>A<4k>2%^_Z(+gYKl0O1i66 z0I#ZKd7)BKz<)wh@b<K+jG~gM)Dwg~^#d#G8)2EGPwkT>aK9qquzqSML+Axf!&EZc zE<Dw}A{gQ#X<w{02{3&_Ag34W!aPj(@R*{O^4826I48DppgqRnL0wsWr-!0u>17)R zSb_b3md^h5`8DyXZR=8%;|Qy_GQyI3i`7Q&Et%Au-W%OWG^OdsXS^oRW+~Iw_U%o* zJAJI)>7g=rm((k-{_yCJ9mAWcJgW>vMVwDVmiJA$9I|$As=?_uWal}OD@=L#;75{S e$*huHHLF)@hI~(E&jd|NtLo)RW##&rO7H)}AB@ES diff --git a/src/bayesvalidrox/surrogate_models/engine.py b/src/bayesvalidrox/surrogate_models/engine.py index 6320b6e7e..7b3adc041 100644 --- a/src/bayesvalidrox/surrogate_models/engine.py +++ b/src/bayesvalidrox/surrogate_models/engine.py @@ -5,13 +5,17 @@ Engine to train the surrogate with @author: Rebecca Kohlhaas """ +import copy import h5py +import joblib import numpy as np import os from .inputs import Input from .exp_designs import ExpDesigns +from .exp_designs_ import ExpDesigns_ from .surrogate_models import MetaModel +from bayesvalidrox.post_processing.post_processing import PostProcessing class Engine(): @@ -33,16 +37,14 @@ class Engine(): self.out_names = self.Model.Output.names self.MetaModel.out_names = self.out_names - # Add expDesign to MetaModel if it does not have any - if not hasattr(self.MetaModel, 'ExpDesign'): - self.MetaModel.ExpDesign = self.ExpDesign - def train_normal(self, parallel = False, verbose = False) -> None: + def train_normal(self, parallel = False, verbose = False, save = False) -> None: """ Trains surrogate on static samples only. Samples are taken from the experimental design and the specified model is run on them. Alternatively the samples can be read in from a provided hdf5 file. + Returns ------- @@ -84,14 +86,27 @@ class Engine(): ExpDesign.x_values = ExpDesign.Y['x_values'] del ExpDesign.Y['x_values'] - MetaModel.ndim = ExpDesign.ndim # Build and train the MetaModel on the static samples MetaModel.CollocationPoints = ExpDesign.X_tr - MetaModel.build_metamodel() + MetaModel.build_metamodel(ExpDesign.n_init_samples) # Fit the surrogate MetaModel.fit(ExpDesign.X_tr, ExpDesign.Y, parallel, verbose) + + # TODO: this just a fix for the evals - remove later + MetaModel.ExpDesign = ExpDesign + + # Save what there is to save + if save: + # Save surrogate + with open(f'surrogates/surrogate_{self.Model.name}.pk1', 'wb') as output: + joblib.dump(MetaModel, output, 2) + + # Zip the model run directories + if self.Model.link_type.lower() == 'pylink' and\ + self.ExpDesign.sampling_method.lower() != 'user': + self.Model.zip_subdirs(self.Model.name, f'{self.Model.name}_') def train_sequential(self, parallel = False, verbose = False) -> None: @@ -105,4 +120,149 @@ class Engine(): None """ - self.train_normal(parallel, verbose) \ No newline at end of file + self.train_normal(parallel, verbose) + + + + # ------------------------------------------------------------------------- + def eval_metamodel(self, samples=None, nsamples=None, + sampling_method='random', return_samples=False): + """ + Evaluates meta-model at the requested samples. One can also generate + nsamples. + + Parameters + ---------- + samples : array of shape (n_samples, n_params), optional + Samples to evaluate meta-model at. The default is None. + nsamples : int, optional + Number of samples to generate, if no `samples` is provided. The + default is None. + sampling_method : str, optional + Type of sampling, if no `samples` is provided. The default is + 'random'. + return_samples : bool, optional + Retun samples, if no `samples` is provided. The default is False. + + Returns + ------- + mean_pred : dict + Mean of the predictions. + std_pred : dict + Standard deviatioon of the predictions. + """ + # Generate or transform (if need be) samples + if samples is None: + # Generate + samples = self.ExpDesign.generate_samples( + nsamples, + sampling_method + ) + + # Transform samples to the independent space + samples = self.ExpDesign.transform( + samples, + method='user' + ) + # print(samples) + + # Compute univariate bases for the given samples + if self.meta_model_type.lower() != 'gpe': + univ_p_val = self.univ_basis_vals( + samples, + n_max=np.max(self.pce_deg) + ) + + mean_pred_b = {} + std_pred_b = {} + # Loop over bootstrap iterations + for b_i in range(self.n_bootstrap_itrs): + + # Extract model dictionary + if self.meta_model_type.lower() == 'gpe': + model_dict = self.gp_poly[f'b_{b_i+1}'] + else: + model_dict = self.coeffs_dict[f'b_{b_i+1}'] + + # Loop over outputs + mean_pred = {} + std_pred = {} + for output, values in model_dict.items(): + + mean = np.empty((len(samples), len(values))) + std = np.empty((len(samples), len(values))) + idx = 0 + for in_key, InIdxValues in values.items(): + + # Perdiction with GPE + if self.meta_model_type.lower() == 'gpe': + X_T = self.x_scaler[f'b_{b_i+1}'][output].transform(samples) + gp = self.gp_poly[f'b_{b_i+1}'][output][in_key] + y_mean, y_std = gp.predict(X_T, return_std=True) + + else: + # Perdiction with PCE + # Assemble Psi matrix + basis = self.basis_dict[f'b_{b_i+1}'][output][in_key] + psi = self.create_psi(basis, univ_p_val) + + # Perdiction + if self.bootstrap_method != 'fast' or b_i == 0: + # with error bar, i.e. use clf_poly + clf_poly = self.clf_poly[f'b_{b_i+1}'][output][in_key] + try: + y_mean, y_std = clf_poly.predict( + psi, return_std=True + ) + except TypeError: + y_mean = clf_poly.predict(psi) + y_std = np.zeros_like(y_mean) + else: + # without error bar + coeffs = self.coeffs_dict[f'b_{b_i+1}'][output][in_key] + y_mean = np.dot(psi, coeffs) + y_std = np.zeros_like(y_mean) + + mean[:, idx] = y_mean + std[:, idx] = y_std + idx += 1 + + # Save predictions for each output + if self.dim_red_method.lower() == 'pca': + PCA = self.pca[f'b_{b_i+1}'][output] + mean_pred[output] = PCA.inverse_transform(mean) + std_pred[output] = np.zeros(mean.shape) + else: + mean_pred[output] = mean + std_pred[output] = std + + # Save predictions for each bootstrap iteration + mean_pred_b[b_i] = mean_pred + std_pred_b[b_i] = std_pred + + # Change the order of nesting + mean_pred_all = {} + for i in sorted(mean_pred_b): + for k, v in mean_pred_b[i].items(): + if k not in mean_pred_all: + mean_pred_all[k] = [None] * len(mean_pred_b) + mean_pred_all[k][i] = v + + # Compute the moments of predictions over the predictions + for output in self.out_names: + # Only use bootstraps with finite values + finite_rows = np.isfinite( + mean_pred_all[output]).all(axis=2).all(axis=1) + outs = np.asarray(mean_pred_all[output])[finite_rows] + # Compute mean + mean_pred[output] = np.mean(outs, axis=0) + # Compute standard deviation + if self.n_bootstrap_itrs > 1: + std_pred[output] = np.std(outs, axis=0) + else: + std_pred[output] = std_pred_b[b_i][output] + + if return_samples: + return mean_pred, std_pred, samples + else: + return mean_pred, std_pred \ No newline at end of file diff --git a/src/bayesvalidrox/surrogate_models/exp_designs.py b/src/bayesvalidrox/surrogate_models/exp_designs.py index 3e26c8153..1036dbbba 100644 --- a/src/bayesvalidrox/surrogate_models/exp_designs.py +++ b/src/bayesvalidrox/surrogate_models/exp_designs.py @@ -294,37 +294,6 @@ class ExpDesigns: Inputs.Marginals[i].parameters = [low_bound, up_bound] - # ------------------------------------------------------------------------- - def generate_samples(self, n_samples, sampling_method='random', - transform=False): - """ - Generates samples with given sampling method - - Parameters - ---------- - n_samples : int - Number of requested samples. - sampling_method : str, optional - Sampling method. The default is `'random'`. - transform : bool, optional - Transformation via an isoprobabilistic transformation method. The - default is `False`. - - Returns - ------- - samples: array of shape (n_samples, n_params) - Generated samples from defined model input object. - - """ - try: - samples = chaospy.generate_samples( - int(n_samples), domain=self.origJDist, rule=sampling_method - ) - except: - samples = self.random_sampler(int(n_samples)).T - - return samples.T - # ------------------------------------------------------------------------- def generate_ED(self, n_samples, transform=False, max_pce_deg=None): diff --git a/src/bayesvalidrox/surrogate_models/exp_designs_.py b/src/bayesvalidrox/surrogate_models/exp_designs_.py new file mode 100644 index 000000000..36146225d --- /dev/null +++ b/src/bayesvalidrox/surrogate_models/exp_designs_.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import numpy as np +import math +import itertools +import chaospy +import scipy.stats as st +from tqdm import tqdm +import h5py +import os + +from .apoly_construction import apoly_construction + + +class ExpDesigns_: + """ + This class generates samples from the prescribed marginals for the model + parameters using the `Input` object. + + Attributes + ---------- + Input : obj + Input object containing the parameter marginals, i.e. name, + distribution type and distribution parameters or available raw data. + method : str + Type of the experimental design. The default is `'normal'`. Other + option is `'sequential'`. + meta_Model_type : str + Type of the meta_Model_type. + sampling_method : str + Name of the sampling method for the experimental design. The following + sampling method are supported: + + * random + * latin_hypercube + * sobol + * halton + * hammersley + * chebyshev(FT) + * grid(FT) + * user + hdf5_file : str + Name of the hdf5 file that contains the experimental design. + n_new_samples : int + Number of (initial) training points. + n_max_samples : int + Number of maximum training points. + mod_LOO_threshold : float + The modified leave-one-out cross validation threshold where the + sequential design stops. + tradeoff_scheme : str + Trade-off scheme to assign weights to the exploration and exploitation + scores in the sequential design. + n_canddidate : int + Number of candidate training sets to calculate the scores for. + explore_method : str + Type of the exploration method for the sequential design. The following + methods are supported: + + * Voronoi + * random + * latin_hypercube + * LOOCV + * dual annealing + exploit_method : str + Type of the exploitation method for the sequential design. The + following methods are supported: + + * BayesOptDesign + * BayesActDesign + * VarOptDesign + * alphabetic + * Space-filling + util_func : str or list + The utility function to be specified for the `exploit_method`. For the + available utility functions see Note section. + n_cand_groups : int + Number of candidate groups. Each group of candidate training sets will + be evaulated separately in parallel. + n_replication : int + Number of replications. Only for comparison. The default is 1. + post_snapshot : int + Whether to plot the posterior in the sequential design. The default is + `True`. + step_snapshot : int + The number of steps to plot the posterior in the sequential design. The + default is 1. + max_a_post : list or array + Maximum a posteriori of the posterior distribution, if known. The + default is `[]`. + adapt_verbose : bool + Whether to plot the model response vs that of metamodel for the new + trining point in the sequential design. + + Note + ---------- + The following utiliy functions for the **exploitation** methods are + supported: + + #### BayesOptDesign (when data is available) + - DKL (Kullback-Leibler Divergence) + - DPP (D-Posterior-percision) + - APP (A-Posterior-percision) + + #### VarBasedOptDesign -> when data is not available + - Entropy (Entropy/MMSE/active learning) + - EIGF (Expected Improvement for Global fit) + - LOOCV (Leave-one-out Cross Validation) + + #### alphabetic + - D-Opt (D-Optimality) + - A-Opt (A-Optimality) + - K-Opt (K-Optimality) + """ + + def __init__(self, Input, method='normal', meta_Model_type='pce', + sampling_method='random', hdf5_file=None, + n_new_samples=1, n_max_samples=None, mod_LOO_threshold=1e-16, + tradeoff_scheme=None, n_canddidate=1, explore_method='random', + exploit_method='Space-filling', util_func='Space-filling', + n_cand_groups=4, n_replication=1, post_snapshot=False, + step_snapshot=1, max_a_post=[], adapt_verbose=False): + + self.InputObj = Input + self.method = method # TODO: this still doing sth? + self.meta_Model_type = meta_Model_type + self.sampling_method = sampling_method + self.hdf5_file = hdf5_file + self.n_new_samples = n_new_samples + self.n_max_samples = n_max_samples + self.mod_LOO_threshold = mod_LOO_threshold + self.explore_method = explore_method + self.exploit_method = exploit_method + self.util_func = util_func + self.tradeoff_scheme = tradeoff_scheme + self.n_canddidate = n_canddidate + self.n_cand_groups = n_cand_groups + self.n_replication = n_replication + self.post_snapshot = post_snapshot + self.step_snapshot = step_snapshot + self.max_a_post = max_a_post + self.adapt_verbose = adapt_verbose + + # Other + self.apce = None + self.ndim = None + + # Init + self.check_valid_inputs() + + + def check_valid_inputs(self)-> None: + """ + Check if the given InputObj is valid to use for further calculations: + Has some Marginals + Marginals have valid priors + All Marginals given as the same type (samples vs dist) + + Returns + ------- + None + + """ + Inputs = self.InputObj + self.ndim = len(Inputs.Marginals) + + # Check if PCE or aPCE metamodel is selected. + # TODO: test also for 'pce'?? + if self.meta_Model_type.lower() == 'apce': + self.apce = True + else: + self.apce = False + + # check if marginals given + if not self.ndim >=1: + raise AssertionError('Cannot build distributions if no marginals are given') + + # check that each marginal is valid + for marginals in Inputs.Marginals: + if len(marginals.input_data) == 0: + if marginals.dist_type == None: + raise AssertionError('Not all marginals were provided priors') + break + if np.array(marginals.input_data).shape[0] and (marginals.dist_type != None): + raise AssertionError('Both samples and distribution type are given. Please choose only one.') + break + + # Check if input is given as dist or input_data. + self.input_data_given = -1 + for marg in Inputs.Marginals: + #print(self.input_data_given) + size = np.array(marg.input_data).shape[0] + #print(f'Size: {size}') + if size and abs(self.input_data_given) !=1: + self.input_data_given = 2 + break + if (not size) and self.input_data_given > 0: + self.input_data_given = 2 + break + if not size: + self.input_data_given = 0 + if size: + self.input_data_given = 1 + + if self.input_data_given == 2: + raise AssertionError('Distributions cannot be built as the priors have different types') + + + # Get the bounds if input_data are directly defined by user: + if self.input_data_given: + for i in range(self.ndim): + low_bound = np.min(Inputs.Marginals[i].input_data) + up_bound = np.max(Inputs.Marginals[i].input_data) + Inputs.Marginals[i].parameters = [low_bound, up_bound] + + + + # ------------------------------------------------------------------------- + def init_param_space(self, max_deg=None): + """ + Initializes parameter space. + + Parameters + ---------- + max_deg : int, optional + Maximum degree. The default is `None`. + + Returns + ------- + raw_data : array of shape (n_params, n_samples) + Raw data. + bound_tuples : list of tuples + A list containing lower and upper bounds of parameters. + + """ + Inputs = self.InputObj + ndim = self.ndim + rosenblatt_flag = Inputs.Rosenblatt + mc_size = 50000 + + # Save parameter names + self.par_names = [] + for parIdx in range(ndim): + self.par_names.append(Inputs.Marginals[parIdx].name) + + # Create a multivariate probability distribution + # TODO: change this to make max_deg obligatory? at least in some specific cases? + if max_deg is not None: + JDist, poly_types = self.build_polytypes(rosenblatt=rosenblatt_flag) + self.JDist, self.poly_types = JDist, poly_types + + if self.input_data_given: + self.MCSize = len(Inputs.Marginals[0].input_data) + self.raw_data = np.zeros((ndim, self.MCSize)) + + for parIdx in range(ndim): + # Save parameter names + try: + self.raw_data[parIdx] = np.array( + Inputs.Marginals[parIdx].input_data) + except: + self.raw_data[parIdx] = self.JDist[parIdx].sample(mc_size) + + else: + # Generate random samples based on parameter distributions + self.raw_data = chaospy.generate_samples(mc_size, + domain=self.JDist) + + # Extract moments + for parIdx in range(ndim): + mu = np.mean(self.raw_data[parIdx]) + std = np.std(self.raw_data[parIdx]) + self.InputObj.Marginals[parIdx].moments = [mu, std] + + # Generate the bounds based on given inputs for marginals + bound_tuples = [] + for i in range(ndim): + if Inputs.Marginals[i].dist_type == 'unif': + low_bound, up_bound = Inputs.Marginals[i].parameters + else: + low_bound = np.min(self.raw_data[i]) + up_bound = np.max(self.raw_data[i]) + + bound_tuples.append((low_bound, up_bound)) + + self.bound_tuples = tuple(bound_tuples) + + # ------------------------------------------------------------------------- + def build_polytypes(self, rosenblatt): + """ + Creates the polynomial types to be passed to univ_basis_vals method of + the MetaModel object. + + Parameters + ---------- + rosenblatt : bool + Rosenblatt transformation flag. + + Returns + ------- + orig_space_dist : object + A chaospy JDist object or a gaussian_kde object. + poly_types : list + List of polynomial types for the parameters. + + """ + Inputs = self.InputObj + + all_data = [] + all_dist_types = [] + orig_joints = [] + poly_types = [] + + for parIdx in range(self.ndim): + + if Inputs.Marginals[parIdx].dist_type is None: + data = Inputs.Marginals[parIdx].input_data + all_data.append(data) + dist_type = None + else: + dist_type = Inputs.Marginals[parIdx].dist_type + params = Inputs.Marginals[parIdx].parameters + + if rosenblatt: + polytype = 'hermite' + dist = chaospy.Normal() + + elif dist_type is None: + polytype = 'arbitrary' + dist = None + + elif 'unif' in dist_type.lower(): + polytype = 'legendre' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Uniform(lower=params[0], upper=params[1]) + + elif 'norm' in dist_type.lower() and \ + 'log' not in dist_type.lower(): + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'hermite' + dist = chaospy.Normal(mu=params[0], sigma=params[1]) + + elif 'gamma' in dist_type.lower(): + polytype = 'laguerre' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Gamma(shape=params[0], + scale=params[1], + shift=params[2]) + + elif 'beta' in dist_type.lower(): + if not np.array(params).shape[0]>=4: + raise AssertionError(f'Distribution has too few parameters!') + polytype = 'jacobi' + dist = chaospy.Beta(alpha=params[0], beta=params[1], + lower=params[2], upper=params[3]) + + elif 'lognorm' in dist_type.lower(): + polytype = 'hermite' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + mu = np.log(params[0]**2/np.sqrt(params[0]**2 + params[1]**2)) + sigma = np.sqrt(np.log(1 + params[1]**2 / params[0]**2)) + dist = chaospy.LogNormal(mu, sigma) + # dist = chaospy.LogNormal(mu=params[0], sigma=params[1]) + + elif 'expon' in dist_type.lower(): + polytype = 'exponential' + if not np.array(params).shape[0]>=2: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Exponential(scale=params[0], shift=params[1]) + + elif 'weibull' in dist_type.lower(): + polytype = 'weibull' + if not np.array(params).shape[0]>=3: + raise AssertionError(f'Distribution has too few parameters!') + dist = chaospy.Weibull(shape=params[0], scale=params[1], + shift=params[2]) + + else: + message = (f"DistType {dist_type} for parameter" + f"{parIdx+1} is not available.") + raise ValueError(message) + + if self.input_data_given or self.apce: + polytype = 'arbitrary' + + # Store dists and poly_types + orig_joints.append(dist) + poly_types.append(polytype) + all_dist_types.append(dist_type) + + # Prepare final output to return + if None in all_dist_types: + # Naive approach: Fit a gaussian kernel to the provided data + Data = np.asarray(all_data) + try: + orig_space_dist = st.gaussian_kde(Data) + except: + raise ValueError('The samples provided to the Marginals should be 1D only') + self.prior_space = orig_space_dist + else: + orig_space_dist = chaospy.J(*orig_joints) + try: + self.prior_space = st.gaussian_kde(orig_space_dist.sample(10000)) + except: + raise ValueError('Parameter values are not valid, please set differently') + + return orig_space_dist, poly_types + + # ------------------------------------------------------------------------- + def transform(self, X, params=None, method=None): + """ + Transform the samples via either a Rosenblatt or an isoprobabilistic + transformation. + + Parameters + ---------- + X : array of shape (n_samples,n_params) + Samples to be transformed. + method : string + If transformation method is 'user' transform X, else just pass X. + + Returns + ------- + tr_X: array of shape (n_samples,n_params) + Transformed samples. + + """ + # Check for built JDist + if not hasattr(self, 'JDist'): + raise AttributeError('Call function init_param_space first to create JDist') + + # Check if X is 2d + if X.ndim != 2: + raise AttributeError('X should have two dimensions') + + # Check if size of X matches Marginals + if X.shape[1]!= self.ndim: + raise AttributeError('The second dimension of X should be the same size as the number of marginals in the InputObj') + + if self.InputObj.Rosenblatt: + self.origJDist, _ = self.build_polytypes(False) + if method == 'user': + tr_X = self.JDist.inv(self.origJDist.fwd(X.T)).T + else: + # Inverse to original spcace -- generate sample ED + tr_X = self.origJDist.inv(self.JDist.fwd(X.T)).T + else: + # Transform samples via an isoprobabilistic transformation + n_samples, n_params = X.shape + Inputs = self.InputObj + origJDist = self.JDist + poly_types = self.poly_types + + disttypes = [] + for par_i in range(n_params): + disttypes.append(Inputs.Marginals[par_i].dist_type) + + # Pass non-transformed X, if arbitrary PCE is selected. + if None in disttypes or self.input_data_given or self.apce: + return X + + cdfx = np.zeros((X.shape)) + tr_X = np.zeros((X.shape)) + + for par_i in range(n_params): + + # Extract the parameters of the original space + disttype = disttypes[par_i] + if disttype is not None: + dist = origJDist[par_i] + else: + dist = None + polytype = poly_types[par_i] + cdf = np.vectorize(lambda x: dist.cdf(x)) + + # Extract the parameters of the transformation space based on + # polyType + if polytype == 'legendre' or disttype == 'uniform': + # Generate Y_Dists based + params_Y = [-1, 1] + dist_Y = st.uniform(loc=params_Y[0], + scale=params_Y[1]-params_Y[0]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'hermite' or disttype == 'norm': + params_Y = [0, 1] + dist_Y = st.norm(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + elif polytype == 'laguerre' or disttype == 'gamma': + params_Y = [1, params[1]] + dist_Y = st.gamma(loc=params_Y[0], scale=params_Y[1]) + inv_cdf = np.vectorize(lambda x: dist_Y.ppf(x)) + + # Compute CDF_x(X) + cdfx[:, par_i] = cdf(X[:, par_i]) + + # Compute invCDF_y(cdfx) + tr_X[:, par_i] = inv_cdf(cdfx[:, par_i]) + + return tr_X + + diff --git a/src/bayesvalidrox/surrogate_models/input_space.py b/src/bayesvalidrox/surrogate_models/input_space.py index adaa807ad..e0a94a98a 100644 --- a/src/bayesvalidrox/surrogate_models/input_space.py +++ b/src/bayesvalidrox/surrogate_models/input_space.py @@ -214,38 +214,6 @@ class InputSpace: up_bound = np.max(Inputs.Marginals[i].input_data) Inputs.Marginals[i].parameters = [low_bound, up_bound] - # ------------------------------------------------------------------------- - def generate_ED(self, n_samples, transform=False, - max_pce_deg=None): - """ - Generates experimental designs (training set) with the given method. - - Parameters - ---------- - n_samples : int - Number of requested training points. - sampling_method : str, optional - Sampling method. The default is `'random'`. - transform : bool, optional - Isoprobabilistic transformation. The default is `False`. - max_pce_deg : int, optional - Maximum PCE polynomial degree. The default is `None`. - - Returns - ------- - None - - """ - if n_samples <0: - raise ValueError('A negative number of samples cannot be created. Please provide positive n_samples') - n_samples = int(n_samples) - - if not hasattr(self, 'n_init_samples'): - self.n_init_samples = n_samples - - # Generate the samples based on requested method - self.init_param_space(max_pce_deg) - # ------------------------------------------------------------------------- def init_param_space(self, max_deg=None): diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index c3abe2af8..b88f128b2 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -17,7 +17,7 @@ import os from joblib import Parallel, delayed import copy -from .exp_designs import ExpDesigns +from .exp_designs_ import ExpDesigns_ from .glexindex import glexindex from .eval_rec_rule import eval_univ_basis from .reg_fast_ard import RegressionFastARD @@ -115,7 +115,7 @@ class MetaModel(): self.dim_red_method = dim_red_method self.verbose = verbose - def build_metamodel(self) -> None: + def build_metamodel(self, n_init_samples) -> None: """ Builds the parts for the metamodel (polynomes,...) that are neede before fitting. @@ -125,6 +125,15 @@ class MetaModel(): DESCRIPTION. """ + + # Add expDesign to MetaModel if it does not have any + if not hasattr(self, 'ExpDesign'): + self.ExpDesign = ExpDesigns_(self.input_obj) + self.ExpDesign.n_init_samples = n_init_samples + self.ExpDesign.init_param_space(np.max(self.pce_deg)) + + self.ndim = self.ExpDesign.ndim + if not hasattr(self, 'CollocationPoints'): raise AttributeError('Please provide samples to the metamodle before building it.') @@ -451,7 +460,7 @@ class MetaModel(): None. """ - self.ExpDesign = ExpDesigns(self.input_obj, + self.ExpDesign = ExpDesigns_(self.input_obj, meta_Model_type=self.meta_model_type) # ------------------------------------------------------------------------- @@ -488,7 +497,11 @@ class MetaModel(): ExpDesign.read_from_file(self.out_names) else: # Check if an old hdf5 file exists: if yes, rename it - hdf5file = f'ExpDesign_{self.ModelObj.name}.hdf5' + if hasattr(self.ModelObj, 'name'): + self.name = self.ModelObj.name + else: + self.name = 'MetaModel' + hdf5file = f'ExpDesign_{self.name}.hdf5' if os.path.exists(hdf5file): os.rename(hdf5file, 'old_'+hdf5file) @@ -1454,7 +1467,8 @@ class MetaModel(): # Define the deg_array max_deg = np.max(self.pce_deg) min_Deg = np.min(self.pce_deg) - nitr = n_samples - self.ExpDesign.n_init_samples + # TODO: remove the options for sequential? + #nitr = n_samples - self.ExpDesign.n_init_samples # Check q-norm if not np.isscalar(self.pce_q_norm): @@ -1470,7 +1484,8 @@ class MetaModel(): return n_combo deg_new = max_deg - d = nitr if nitr != 0 and self.n_params > 5 else 1 + #d = nitr if nitr != 0 and self.n_params > 5 else 1 + d = 1 min_index = np.argmin(abs(M_uptoMax(max_deg)-ndim*n_samples*d)) # deg_new = range(1, max_deg+1)[min_index] diff --git a/tests/test_MetaModel.py b/tests/test_MetaModel.py index 7310b3708..74d0aede3 100644 --- a/tests/test_MetaModel.py +++ b/tests/test_MetaModel.py @@ -124,11 +124,11 @@ def test_generate_polynomials_deg() -> None: mm.generate_polynomials(4) -#%% Test MetaMod.add_ExpDesign +#%% Test MetaMod.add_InputSpace -def test_add_ExpDesign() -> None: +def test_add_InputSpace() -> None: """ - Add ExpDesign in MetaModel + Add InputSpace in MetaModel """ inp = Input() inp.add_marginals() @@ -136,7 +136,7 @@ def test_add_ExpDesign() -> None: inp.Marginals[0].parameters = [0,1] mod = PL() mm = MetaModel(inp, mod) - mm.add_ExpDesign() + mm.add_InputSpace() #%% Test MetaModel.generate_ExpDesign -- GitLab