From 3d30ea005bf848098b626f176b40884e0b8a0f20 Mon Sep 17 00:00:00 2001 From: kohlhaasrebecca <rebecca.kohlhaas@outlook.com> Date: Fri, 2 Aug 2024 11:24:41 +0200 Subject: [PATCH] Moved preprocessing for the training to a decorator _decorator_fit Preprocessing includes building the InputSpace, checking the input dimensions and assigning the inputs to parameters. --- .../.$training restructure.drawio.bkp | 64 +++++++ docs/diagrams/training restructure.drawio | 70 +++++++ .../example_analytical_function.py | 16 +- .../surrogate_models/polynomial_chaos.py | 107 ++++------- .../surrogate_models/surrogate_models.py | 180 +++++++++++------- 5 files changed, 285 insertions(+), 152 deletions(-) create mode 100644 docs/diagrams/.$training restructure.drawio.bkp create mode 100644 docs/diagrams/training restructure.drawio diff --git a/docs/diagrams/.$training restructure.drawio.bkp b/docs/diagrams/.$training restructure.drawio.bkp new file mode 100644 index 000000000..bd3e47de1 --- /dev/null +++ b/docs/diagrams/.$training restructure.drawio.bkp @@ -0,0 +1,64 @@ +<mxfile host="Electron" modified="2024-07-31T10:12:06.359Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/22.1.11 Chrome/114.0.5735.289 Electron/25.9.8 Safari/537.36" etag="_svUbBalPQuec83DpYRD" version="22.1.11" type="device"> + <diagram name="Page-1" id="63dxfbV-El8Ic4h_2P3G"> + <mxGraphModel dx="1434" dy="956" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0"> + <root> + <mxCell id="0" /> + <mxCell id="1" parent="0" /> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="Kjr_m6IkcFhD3S5Vm94S-1" target="Kjr_m6IkcFhD3S5Vm94S-5"> + <mxGeometry relative="1" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-1" value="adaptive_regression" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1"> + <mxGeometry x="480" y="480" width="130" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-2" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="460" y="450" width="50" height="100" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-3" value="parallel" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="520" y="440" width="60" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="Kjr_m6IkcFhD3S5Vm94S-4" target="Kjr_m6IkcFhD3S5Vm94S-5"> + <mxGeometry relative="1" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-4" value="update" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1"> + <mxGeometry x="460" y="590" width="120" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-5" value="regression" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1"> + <mxGeometry x="670" y="540" width="120" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-9" value="b_i number, fast_bootstrap" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="490" y="410" width="180" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-10" value="storage" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="425" y="660" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-11" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="440" y="420" width="50" height="230" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-12" value="trafo_output[key]" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1"> + <mxGeometry x="440" y="350" width="130" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-13" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="425" y="330" width="50" height="370" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-14" value="Outputs, keys" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="470" y="320" width="110" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-15" value="setup PCA" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="420" y="280" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-16" value="bootstrap step" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="430" y="250" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-18" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="410" y="240" width="50" height="480" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-19" value="bootstrap" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="450" y="220" width="110" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-20" value="setup bootstrap" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="400" y="190" width="110" height="30" as="geometry" /> + </mxCell> + </root> + </mxGraphModel> + </diagram> +</mxfile> diff --git a/docs/diagrams/training restructure.drawio b/docs/diagrams/training restructure.drawio new file mode 100644 index 000000000..c8e50029b --- /dev/null +++ b/docs/diagrams/training restructure.drawio @@ -0,0 +1,70 @@ +<mxfile host="Electron" modified="2024-07-31T11:03:38.920Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/22.1.11 Chrome/114.0.5735.289 Electron/25.9.8 Safari/537.36" etag="tU7ui0JPg_6b6hPSW8yx" version="22.1.11" type="device"> + <diagram name="Page-1" id="63dxfbV-El8Ic4h_2P3G"> + <mxGraphModel dx="1434" dy="956" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0"> + <root> + <mxCell id="0" /> + <mxCell id="1" parent="0" /> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="Kjr_m6IkcFhD3S5Vm94S-1" target="Kjr_m6IkcFhD3S5Vm94S-5"> + <mxGeometry relative="1" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-1" value="adaptive_regression" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="1"> + <mxGeometry x="480" y="480" width="130" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-2" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="460" y="450" width="50" height="100" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-3" value="parallel" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="520" y="440" width="60" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" edge="1" parent="1" source="Kjr_m6IkcFhD3S5Vm94S-4" target="Kjr_m6IkcFhD3S5Vm94S-5"> + <mxGeometry relative="1" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-4" value="update" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="1"> + <mxGeometry x="460" y="590" width="120" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-5" value="regression" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="1"> + <mxGeometry x="670" y="540" width="120" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-9" value="b_i number, fast_bootstrap" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#66B2FF;" vertex="1" parent="1"> + <mxGeometry x="490" y="410" width="180" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-10" value="storage" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#FF3333;" vertex="1" parent="1"> + <mxGeometry x="425" y="660" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-11" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1"> + <mxGeometry x="440" y="420" width="50" height="230" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-12" value="trafo_output[key]" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1"> + <mxGeometry x="440" y="350" width="130" height="40" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-13" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;" vertex="1" parent="1"> + <mxGeometry x="425" y="330" width="50" height="370" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-14" value="Outputs, keys" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="470" y="320" width="110" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-15" value="setup PCA" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#009900;" vertex="1" parent="1"> + <mxGeometry x="420" y="280" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-16" value="bootstrap step" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#66B2FF;" vertex="1" parent="1"> + <mxGeometry x="430" y="250" width="80" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-18" value="" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.annotation_1;align=left;pointerEvents=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1"> + <mxGeometry x="410" y="240" width="50" height="480" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-19" value="bootstrap" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#66B2FF;" vertex="1" parent="1"> + <mxGeometry x="450" y="230" width="110" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-20" value="setup bootstrap" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#66B2FF;" vertex="1" parent="1"> + <mxGeometry x="400" y="200" width="110" height="30" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-21" value="" style="shape=curlyBracket;whiteSpace=wrap;html=1;rounded=1;labelPosition=left;verticalLabelPosition=middle;align=right;verticalAlign=middle;" vertex="1" parent="1"> + <mxGeometry x="350" y="170" width="20" height="600" as="geometry" /> + </mxCell> + <mxCell id="Kjr_m6IkcFhD3S5Vm94S-22" value="decorate_fit" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1"> + <mxGeometry x="365" y="140" width="110" height="30" as="geometry" /> + </mxCell> + </root> + </mxGraphModel> + </diagram> +</mxfile> diff --git a/examples/analytical-function/example_analytical_function.py b/examples/analytical-function/example_analytical_function.py index ceed4a5d6..4993f3383 100644 --- a/examples/analytical-function/example_analytical_function.py +++ b/examples/analytical-function/example_analytical_function.py @@ -173,22 +173,22 @@ if __name__ == "__main__": # 1) MI (Mutual information) 2) ALC (Active learning McKay) # 2)DKL (Kullback-Leibler Divergence) 3)DPP (D-Posterior-percision) # 4)APP (A-Posterior-percision) # ['DKL', 'BME', 'infEntropy'] - # MetaModelOpts.ExpDesign.util_func = 'DKL' + # ExpDesign.util_func = 'DKL' # BayesActDesign -> when data is available # 1) BME (Bayesian model evidence) 2) infEntropy (Information entropy) # 2)DKL (Kullback-Leibler Divergence) - #MetaModelOpts.ExpDesign.util_func = 'DKL' + # ExpDesign.util_func = 'DKL' # VarBasedOptDesign -> when data is not available # 1)ALM 2)EIGF, 3)LOOCV # or a combination as a list - # MetaModelOpts.ExpDesign.util_func = 'EIGF' + # ExpDesign.util_func = 'EIGF' # alphabetic # 1)D-Opt (D-Optimality) 2)A-Opt (A-Optimality) # 3)K-Opt (K-Optimality) or a combination as a list - # MetaModelOpts.ExpDesign.util_func = 'D-Opt' + # ExpDesign.util_func = 'D-Opt' # Defining the measurement error, if it's known a priori obsData = pd.DataFrame(Model.observations, columns=Model.Output.names) @@ -210,14 +210,8 @@ if __name__ == "__main__": ExpDesign.valid_model_runs = {'Z': prior_outputs[:500]} - # Run using the new engine - #engine = Engine(MetaModelOpts, Model, ExpDesign) - #engine.start_engine() - #engine.train_normal() - - MetaModelOpts.ExpDesign = ExpDesign + # Run using the engine engine = Engine(MetaModelOpts, Model, ExpDesign) - engine.start_engine() #engine.train_sequential() engine.train_normal() diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 0f921e052..4525837fb 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -8,28 +8,29 @@ import copy import os import warnings +#from functools import wraps import matplotlib.pyplot as plt import numpy as np -import scipy as sp -import sklearn.gaussian_process.kernels as kernels +#import scipy as sp +#import sklearn.gaussian_process.kernels as kernels import sklearn.linear_model as lm from joblib import Parallel, delayed -from scipy.optimize import minimize, NonlinearConstraint -from sklearn.decomposition import PCA as sklearnPCA -from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.preprocessing import MinMaxScaler +#from scipy.optimize import minimize, NonlinearConstraint +#from sklearn.decomposition import PCA as sklearnPCA +#from sklearn.gaussian_process import GaussianProcessRegressor +#from sklearn.preprocessing import MinMaxScaler from tqdm import tqdm from .apoly_construction import apoly_construction from .bayes_linear import VBLinearRegression, EBLinearRegression from .eval_rec_rule import eval_univ_basis from .glexindex import glexindex -from .input_space import InputSpace +#from .input_space import InputSpace from .orthogonal_matching_pursuit import OrthogonalMatchingPursuit from .reg_fast_ard import RegressionFastARD from .reg_fast_laplace import RegressionFastLaplace -from .surrogate_models import MetaModel +from .surrogate_models import MetaModel, _decorator_fit from .supplementary import corr_loocv_error, create_psi @@ -60,7 +61,6 @@ class PCE(MetaModel): _pce_reg_method : str PCE regression method to compute the coefficients. The following regression methods are available: - 1. OLS: Ordinary Least Square method 2. BRR: Bayesian Ridge Regression 3. LARS: Least angle regression @@ -105,12 +105,11 @@ class PCE(MetaModel): # Use parent init super().__init__(input_obj, bootstrap_method, - n_bootstrap_itrs, dim_red_method, is_gaussian, - verbose) + n_bootstrap_itrs, dim_red_method, is_gaussian, verbose) # Additional inputs # Parameters that are not needed from the outside are denoted with '_' - self._meta_model_type = meta_model_type + self.meta_model_type = meta_model_type self._pce_reg_method = pce_reg_method self._pce_deg = pce_deg self._pce_q_norm = pce_q_norm @@ -133,6 +132,7 @@ class PCE(MetaModel): self._q_norm_dict = None self._deg_dict = None + def check_is_gaussian(self, _pce_reg_method, n_bootstrap_itrs) -> bool: """ Check if the metamodel returns a mean and stdev. @@ -152,11 +152,13 @@ class PCE(MetaModel): return False else: return True + - - def build_metamodel(self, n_init_samples=None) -> None: + def build_metamodel(self) -> None: """ - Builds the parts for the metamodel (polynomes,...) that are neede before fitting. + Builds the parts for the metamodel (polynomes,...) that are needed before fitting. + This is executed outside of any loops related to e.g. bootstrap or + transformations such as pca. Returns ------- @@ -164,20 +166,6 @@ class PCE(MetaModel): DESCRIPTION. """ - - # Generate general warnings - if self._pce_reg_method.lower() == 'ols': - print('There are no estimations of surrogate uncertainty available' - ' for the chosen regression options. This might lead to issues' - ' in later steps.') - - if self.CollocationPoints is None: - raise AttributeError('Please provide samples to the metamodel before building it.') - self.CollocationPoints = np.array(self.CollocationPoints) - - # TODO: does this work? - super().build_metamodel() - # Generate polynomials self._generate_polynomials(np.max(self._pce_deg)) @@ -189,18 +177,8 @@ class PCE(MetaModel): self.LOOCV_score_dict = self.auto_vivification() self._clf_poly = self.auto_vivification() self._LCerror = self.auto_vivification() - if self.dim_red_method.lower() == 'pca': - self.pca = self.auto_vivification() - - # Define an array containing the degrees - self.CollocationPoints = np.array(self.CollocationPoints) - self.n_samples, ndim = self.CollocationPoints.shape - if self.ndim != ndim: - raise AttributeError( - 'The given samples do not match the given number of priors. The samples should be a 2D array of size' - '(#samples, #priors)') - self._deg_array = self.__select_degree(ndim, self.n_samples) + self._deg_array = self.__select_degree(self.ndim, self.n_samples) # Generate all basis indices self._allBasisIndices = self.auto_vivification() @@ -215,6 +193,11 @@ class PCE(MetaModel): reverse=False, graded=True) self._allBasisIndices[str(deg)][str(q)] = basis_indices + # Evaluate the univariate polynomials on InputSpace + self._univ_p_val = self.univ_basis_vals(self.CollocationPoints) + + @_decorator_fit + #@_decorator_bootstrap def fit(self, X: np.array, y: dict, parallel=False, verbose=False): """ Fits the surrogate to the given data (samples X, outputs y). @@ -239,25 +222,9 @@ class PCE(MetaModel): None. """ - # Use settings - self.verbose = verbose - self.parallel = parallel - - # TODO: other option: rebuild every time - self.CollocationPoints = X - if self._deg_array is None: - self.build_metamodel(n_init_samples=X.shape[1]) - - # Evaluate the univariate polynomials on InputSpace - self._univ_p_val = self.univ_basis_vals(self.CollocationPoints) - # Store the original ones for later use + # Store the original polynomial evaluation for later use orig__univ_p_val = copy.deepcopy(self._univ_p_val) - # --- Loop through data points and fit the surrogate --- - if verbose: - print(f"\n>>>> Training the {self._meta_model_type} metamodel " - "started. <<<<<<\n") - # --- Bootstrap sampling --- # Correct number of bootstrap if PCA transformation is required. if self.dim_red_method.lower() == 'pca' and self.n_bootstrap_itrs == 1: @@ -348,16 +315,13 @@ class PCE(MetaModel): self._coeffs_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['coeffs'] self._basis_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['multi_indices'] self.LOOCV_score_dict[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['LOOCVScore'] - self._clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['_clf_poly'] + self._clf_poly[f'b_{b_i + 1}'][key][f"y_{i + 1}"] = out[i]['clf_poly'] # TODO: this is commented out here, but should be used in the SeqDesign?? # self._LCerror[f'b_{b_i+1}'][key][f"y_{i+1}"] = out[i]['_LCerror'] # Restore the univariate polynomials self._univ_p_val = orig__univ_p_val - if verbose: - print(f"\n>>>> Training the {self._meta_model_type} metamodel" - " sucessfully completed. <<<<<<\n") # ------------------------------------------------------------------------- def update_pce_coeffs(self, X, y, out_dict=None): @@ -387,9 +351,8 @@ class PCE(MetaModel): # Make a copy final_out_dict = copy.deepcopy(out_dict) - # Create the _univ_p_val - _univ_p_val = self._univ_p_val -# _univ_p_val = self.univ_basis_vals(X, n_max=np.max(self._pce_deg)) + # Create the _univ_p_val based on the given X (X expected to differ from self.CollocationPoints) + _univ_p_val = self.univ_basis_vals(X, n_max=np.max(self._pce_deg)) # Loop over the points for i in range(y.shape[1]): @@ -562,7 +525,7 @@ class PCE(MetaModel): # Create a dict to pass the outputs return_out_dict = dict() - return_out_dict['_clf_poly'] = _clf_poly + return_out_dict['clf_poly'] = _clf_poly return_out_dict['spareMulti-Index'] = sparse_basis_indices return_out_dict['sparePsi'] = sparse_X return_out_dict['coeffs'] = coeffs @@ -633,27 +596,25 @@ class PCE(MetaModel): # Assemble the Psi matrix Psi = create_psi(BasisIndices, self._univ_p_val) - #print(Psi.shape) - # Calulate the cofficients of the metamodel outs = self.regression(Psi, ED_Y, BasisIndices) # Calculate and save the score of LOOCV - score, _LCerror = corr_loocv_error(outs['_clf_poly'], + score, _LCerror = corr_loocv_error(outs['clf_poly'], outs['sparePsi'], outs['coeffs'], ED_Y) # Check the convergence of noise for FastARD if self._pce_reg_method == 'FastARD' and \ - outs['_clf_poly'].alpha_ < np.finfo(np.float32).eps: + outs['clf_poly'].alpha_ < np.finfo(np.float32).eps: score = -np.inf qNormScores[qidx] = score qAllCoeffs[str(qidx + 1)] = outs['coeffs'] qAllIndices_Sparse[str(qidx + 1)] = outs['spareMulti-Index'] - qAll_clf_poly[str(qidx + 1)] = outs['_clf_poly'] + qAll_clf_poly[str(qidx + 1)] = outs['clf_poly'] qAllnTerms[str(qidx + 1)] = BasisIndices.shape[0] qAll_LCerror[str(qidx + 1)] = _LCerror @@ -755,7 +716,7 @@ class PCE(MetaModel): # Create a dict to pass the outputs returnVars = dict() - returnVars['_clf_poly'] = _clf_poly + returnVars['clf_poly'] = _clf_poly returnVars['degree'] = degree returnVars['qnorm'] = qnorm returnVars['coeffs'] = coeffs @@ -796,7 +757,6 @@ class PCE(MetaModel): samples, n_max=np.max(self._pce_deg) ) - #print(_univ_p_val) mean_pred_b = {} std_pred_b = {} @@ -820,9 +780,6 @@ class PCE(MetaModel): # Assemble Psi matrix basis = self._basis_dict[f'b_{b_i + 1}'][output][in_key] psi = create_psi(basis, _univ_p_val) - #print(basis) - - #print(psi.shape) # Prediction if self.bootstrap_method != 'fast' or b_i == 0: diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index c80a6a4a4..a9a683876 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -8,29 +8,112 @@ import copy import os import warnings +from functools import wraps import matplotlib.pyplot as plt import numpy as np -import sklearn.linear_model as lm -from joblib import Parallel, delayed -from scipy.optimize import minimize, NonlinearConstraint +#import sklearn.linear_model as lm +#from joblib import Parallel, delayed +#from scipy.optimize import minimize, NonlinearConstraint from sklearn.decomposition import PCA as sklearnPCA -from sklearn.preprocessing import MinMaxScaler -from tqdm import tqdm +#from sklearn.preprocessing import MinMaxScaler +#from tqdm import tqdm -from .apoly_construction import apoly_construction -from .bayes_linear import VBLinearRegression, EBLinearRegression -from .eval_rec_rule import eval_univ_basis -from .glexindex import glexindex +#from .apoly_construction import apoly_construction +#from .bayes_linear import VBLinearRegression, EBLinearRegression +#from .eval_rec_rule import eval_univ_basis +#from .glexindex import glexindex from .input_space import InputSpace -from .orthogonal_matching_pursuit import OrthogonalMatchingPursuit -from .reg_fast_ard import RegressionFastARD -from .reg_fast_laplace import RegressionFastLaplace +#from .orthogonal_matching_pursuit import OrthogonalMatchingPursuit +#from .reg_fast_ard import RegressionFastARD +#from .reg_fast_laplace import RegressionFastLaplace warnings.filterwarnings("ignore") # Load the mplstyle # noinspection SpellCheckingInspection plt.style.use(os.path.join(os.path.split(__file__)[0], '../', 'bayesvalidrox.mplstyle')) + +def _decorator_fit(fit_function): + """ + This decorator performs the checks and preprocessing for fitting the + metamodel. + + Parameters + ---------- + fit_function : function + Function with the same signature as self.fit(). + + Raises + ------ + AttributeError + DESCRIPTION. + ValueError + DESCRIPTION. + + Returns + ------- + decorator : function + The decorated function. + + """ + @wraps(fit_function) + def decorator(self , *args, **kwargs) : + # Use settings + # TODO: this a bit hacky, need to have arguments in exactly this order + X = args[0] + y = args[1] + self.verbose = args[2] + self.parallel = args[3] + self.ndim = len(self.input_obj.Marginals) + + # Check X + X = np.array(X) + n_samples, ndim = X.shape + if self.ndim != ndim: + raise AttributeError( + 'The given samples do not match the given number of priors. The samples should be a 2D array of size' + '(#samples, #priors)') + self.CollocationPoints = X + self.n_samples = n_samples + + # Check y + for key in y.keys(): + y_val = np.array(y[key]) + if y_val.ndim != 2: + raise ValueError('The given outputs y should be 2D') + y[key] = np.array(y[key]) + self.out_names = list(y.keys()) + + # Build general parameters + if self.dim_red_method.lower() == 'pca': + self.pca = self.auto_vivification() + + # Build the input space + if self.InputSpace is None: + self.InputSpace = InputSpace(self.input_obj) + n_init_samples = self.CollocationPoints.shape[0] + self.InputSpace.n_init_samples = n_init_samples + # TODO: _pce_deg not necessarily available, generalize this! + self.InputSpace.init_param_space(np.max(self._pce_deg)) + + # Transform input samples # TODO: Make 'method' variable + self.CollocationPoints = self.InputSpace.transform(self.CollocationPoints, method='user') + + # Build any other surrogate parts + self.build_metamodel() + + # --- Loop through data points and fit the surrogate --- + if self.verbose: + print(f"\n>>>> Training the {self._meta_model_type} metamodel " + "started. <<<<<<\n") + + # Call the function for fitting + fit_function( self , *args, **kwargs) + + if self.verbose: + print(f"\n>>>> Training the {self._meta_model_type} metamodel" + " sucessfully completed. <<<<<<\n") + return decorator class MetaModel: @@ -96,7 +179,13 @@ class MetaModel: self.ndim = None # Build general parameters - self.InputSpace = InputSpace(self.input_obj) + + # General warnings + if not self.is_gaussian: + print('There are no estimations of surrogate uncertainty available' + ' for the chosen regression options. This might lead to issues' + ' in later steps.') + def check_is_gaussian(self) -> bool: """ @@ -111,40 +200,9 @@ class MetaModel: bool True if stdev is also returned. """ - None - - def valid_training_inputs(self, X: np.array, y: dict): - """ - Check if the training inputs are of the expected format. - Will throw an error otherwise - - Parameters - ---------- - X : 2D list or np.array of shape (#samples, #dim) - The parameter value combinations that the model was evaluated at. - y : dict of 2D lists or arrays of shape (#samples, #timesteps) - The respective model evaluations. - - """ - # TODO: Add to this - # Check size of inputs - X = np.array(X) - for key in y.keys(): - y_val = np.array(y[key]) - if y_val.ndim != 2: - raise ValueError('The given outputs y should be 2D') - y[key] = np.array(y[key]) - - - # Output names are the same as the keys in y - self.out_names = list(y.keys()) - - # Build the MetaModel on the static samples - self.CollocationPoints = X + pass - - def build_metamodel(self) -> None: """ Builds the parts for the metamodel (polynomes,...) that are needed @@ -158,24 +216,10 @@ class MetaModel: None """ - # Extend input space - n_init_samples=self.CollocationPoints.shape[1] - if self.InputSpace.n_init_samples is None: - if n_init_samples is None: - n_init_samples = self._CollocationPoints.shape[0] - self.InputSpace.n_init_samples = n_init_samples - self.InputSpace.init_param_space(np.max(self._pce_deg)) - self.ndim = self.InputSpace.ndim - - - # Transform input samples - # TODO: Make 'method' variable - self.CollocationPoints = self.InputSpace.transform(self.CollocationPoints, method='user') - - self.ndim = len(self.input_obj.Marginals) - - - + pass + + + @_decorator_fit def fit(self, X: np.array, y: dict, parallel=False, verbose=False): """ Fits the surrogate to the given data (samples X, outputs y). @@ -200,10 +244,11 @@ class MetaModel: None. """ - None + pass + # ------------------------------------------------------------------------- - def pca_transformation(self, target): + def pca_transformation(self, target, n_pca_components): """ Transforms the targets (outputs) via Principal Component Analysis. The number of features is set by `self.n_pca_components`. @@ -228,7 +273,7 @@ class MetaModel: n_samples, n_features = target.shape # Use the given n_pca_components - n_pca_components = self.n_pca_components + #n_pca_components = self.n_pca_components # Switch to var_pca if n_pca_components is too large if (n_pca_components is not None) and (n_pca_components > n_features): @@ -278,6 +323,7 @@ class MetaModel: return pca, scaled_target, n_pca_components + # ------------------------------------------------------------------------- def eval_metamodel(self, samples): """ @@ -299,6 +345,7 @@ class MetaModel: """ return None, None + def compute_moments(self): """ Computes the first two moments of the metamodel. @@ -347,6 +394,7 @@ class MetaModel: new_MetaModelOpts.ndim = len(self.input_obj.Marginals) return new_MetaModelOpts + # ------------------------------------------------------------------------- def add_InputSpace(self): """ -- GitLab