diff --git a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py index 6cf4c1c09b96b612e9f054be2adfa07734734405..860eb80f5cc6ea1c2e3670df07c222f819b0b464 100644 --- a/src/bayesvalidrox/surrogate_models/polynomial_chaos.py +++ b/src/bayesvalidrox/surrogate_models/polynomial_chaos.py @@ -31,7 +31,7 @@ from .orthogonal_matching_pursuit import OrthogonalMatchingPursuit from .reg_fast_ard import RegressionFastARD from .reg_fast_laplace import RegressionFastLaplace -from .surrogate_models import MetaModel, _preprocessing_fit, _bootstrap +from .surrogate_models import MetaModel, _preprocessing_fit, _bootstrap_fit from .supplementary import corr_loocv_error, create_psi @@ -115,6 +115,7 @@ class PCE(MetaModel): self._pce_reg_method = pce_reg_method self._pce_deg = pce_deg self._pce_q_norm = pce_q_norm + self.regression_dict = {} self._pce_reg_options = {} # These three parameters belong to the 'error_model'. @@ -134,7 +135,9 @@ class PCE(MetaModel): self._coeffs_dict = None self._q_norm_dict = None self._deg_dict = None - + + # Initialize the regression options as a dictionary + self.set_regression_options() def check_is_gaussian(self, _pce_reg_method, n_bootstrap_itrs) -> bool: @@ -155,54 +158,57 @@ class PCE(MetaModel): if _pce_reg_method.lower() == 'lars': return False return True - + + def set_regression_options(self) -> None: """ - Chooses the generic settings for the regression type. - TODO: currently not useable! + Collects the generic settings for the regression in a dictionary. + This includes the regression objects and their arguments. Returns ------- None """ - match self._pce_reg_method.lower(): - case 'ols': - self._pce_reg_options = {'fit_intercept':False} - case 'brr': - self._pce_reg_options = {'n_iter':1000, 'tol':1e-7, + # Collect all regression objects + self.regression_dict['ols'] = lm.LinearRegression + self.regression_dict['brr'] = lm.BayesianRidge + self.regression_dict['ard'] = lm.ARDRegression + self.regression_dict['fastard'] = RegressionFastARD + self.regression_dict['bcs'] = RegressionFastLaplace + self.regression_dict['lars'] = lm.LassoLarsCV + self.regression_dict['sgdr'] = lm.SGDRegressor + self.regression_dict['omp'] = OrthogonalMatchingPursuit + self.regression_dict['vbl'] = VBLinearRegression + self.regression_dict['ebl'] = EBLinearRegression + + # Collect the corresponding options + self._pce_reg_options['ols'] = {'fit_intercept':False} + self._pce_reg_options['brr'] = {'n_iter':1000, 'tol':1e-7, 'fit_intercept':False, #'compute_score':compute_score, 'alpha_1':1e-04, 'alpha_2':1e-04, #'lambda_1':lambda_, 'lambda_2':lambda_ } - case 'ard': - self._pce_reg_options = {'fit_intercept':False, + self._pce_reg_options['ard'] = {'fit_intercept':False, #'compute_score':compute_score, 'n_iter':1000, 'tol':0.0001, 'alpha_1':1e-3, 'alpha_2':1e-3, #'lambda_1':lambda_, 'lambda_2':lambda_ } - case 'fastard': - self._pce_reg_options = {'fit_intercept':False, + self._pce_reg_options['fastard'] = {'fit_intercept':False, 'normalize':True, #'compute_score':compute_score, 'n_iter':300, 'tol':1e-10} - case 'bcs': - self._pce_reg_options = {'fit_intercept':False, + self._pce_reg_options['bcs'] = {'fit_intercept':False, #'bias_term':bias_term, 'n_iter':1000, 'tol':1e-7} - case 'lars': - self._pce_reg_options = {'fit_intercept':False} - case 'sgdr': - self._pce_reg_options = {'fit_intercept':False, + self._pce_reg_options['lars'] = {'fit_intercept':False} + self._pce_reg_options['sgdr'] = {'fit_intercept':False, 'max_iter':5000, 'tol':1e-7} - case 'omp': - self._pce_reg_options = {'fit_intercept':False} - case 'vbl': - self._pce_reg_options = {'fit_intercept':False} - case 'ebl': - self._pce_reg_options = {'optimizer':'em'} + self._pce_reg_options['omp'] = {'fit_intercept':False} + self._pce_reg_options['vbl'] = {'fit_intercept':False} + self._pce_reg_options['ebl'] = {'optimizer':'em'} def build_metamodel(self) -> None: @@ -228,7 +234,7 @@ class PCE(MetaModel): self._clf_poly = self.auto_vivification() self._LCerror = self.auto_vivification() - self._deg_array = self.__select_degree(self.ndim, self.n_samples) + self._deg_array = self.__select_degree()#self.ndim, self.n_samples) # Generate all basis indices self._all_basis_indices = self.auto_vivification() @@ -236,7 +242,7 @@ class PCE(MetaModel): keys = self._all_basis_indices.keys() if deg not in np.fromiter(keys, dtype=float): # Generate the polynomial basis indices - for qidx, q in enumerate(self._pce_q_norm): + for _, q in enumerate(self._pce_q_norm): basis_indices = glexindex(start=0, stop=deg + 1, dimensions=self.ndim, cross_truncation=q, @@ -245,7 +251,7 @@ class PCE(MetaModel): @_preprocessing_fit - @_bootstrap + @_bootstrap_fit def fit(self, X: np.array, y: dict, parallel=False, verbose=False, b_i = 0): """ Fits the surrogate to the given data (samples X, outputs y). @@ -355,7 +361,7 @@ class PCE(MetaModel): # Calulate the cofficients of surrogate model updated_out = self.regression( - psi, y[:, i], basis_indices, reg_method='OLS', + psi, y[:, i], basis_indices, sparsity=False ) @@ -364,6 +370,7 @@ class PCE(MetaModel): return final_out_dict + # ------------------------------------------------------------------------- def univ_basis_vals(self, samples, n_max=None): """ @@ -398,6 +405,7 @@ class PCE(MetaModel): return univ_basis + # ------------------------------------------------------------------------- def regression(self, X, y, basis_indices, sparsity=True): """ @@ -423,72 +431,45 @@ class PCE(MetaModel): Fitted estimator, spareMulti-Index, sparseX and coefficients. """ - # Set parameters - bias_term = self.dim_red_method.lower() != 'pca' - compute_score = bool(self.verbose) - - # inverse of the observed variance of the data - if np.var(y) != 0: - lambda_ = 1 / np.var(y) - else: - lambda_ = 1e-6 - # Bayes sparse adaptive aPCE - # TODO: give the parameters for this as dictionary in the future _clf_poly = None - # WARNING: Match-case writing is only available for py 3.10 and above! reg_method = self._pce_reg_method.lower() - if reg_method == 'ols': - _clf_poly = lm.LinearRegression(fit_intercept=False) - elif reg_method == 'brr': - _clf_poly = lm.BayesianRidge(n_iter=1000, tol=1e-7, - fit_intercept=False, - # normalize=True, - compute_score=compute_score, - alpha_1=1e-04, alpha_2=1e-04, - lambda_1=lambda_, lambda_2=lambda_) - _clf_poly.converged = True + kwargs = self._pce_reg_options[reg_method] + + # Add the last parameters + if reg_method in ['brr', 'ard', 'fastard']: + compute_score = bool(self.verbose) + kwargs['compute_score'] = compute_score - elif reg_method == 'ard': + if reg_method in ['brr', 'ard']: + lambda_ = 1e-6 + if np.var(y) != 0: + lambda_ = 1 / np.var(y) + kwargs['lambda_1'] = lambda_ + kwargs['lambda_2'] = lambda_ + + if reg_method == 'bcs': + bias_term = self.dim_red_method.lower() != 'pca' + kwargs['bias_term'] = bias_term + + # Handle any exceptions + if reg_method == 'ard': if X.shape[0] < 2: raise ValueError('Regression with ARD can only be performed for more than 2 samples') - _clf_poly = lm.ARDRegression(fit_intercept=False, - # normalize=True, - compute_score=compute_score, - n_iter=1000, tol=0.0001, - alpha_1=1e-3, alpha_2=1e-3, - lambda_1=lambda_, lambda_2=lambda_) - - elif reg_method == 'fastard': - _clf_poly = RegressionFastARD(fit_intercept=False, - normalize=True, - compute_score=compute_score, - n_iter=300, tol=1e-10) - - elif reg_method == 'bcs': + if reg_method == 'bcs': + # TODO: move this error message into RegressionFastLaplace if X.shape[0] < 10: raise ValueError('Regression with BCS can only be performed for more than 10 samples') - _clf_poly = RegressionFastLaplace(fit_intercept=False, - bias_term=bias_term, - n_iter=1000, tol=1e-7) - - elif reg_method == 'lars': + if reg_method == 'lars': if X.shape[0] < 10: raise ValueError('Regression with LARS can only be performed for more than 5 samples') - _clf_poly = lm.LassoLarsCV(fit_intercept=False) - - elif reg_method == 'sgdr': - _clf_poly = lm.SGDRegressor(fit_intercept=False, - max_iter=5000, tol=1e-7) - - elif reg_method == 'omp': - _clf_poly = OrthogonalMatchingPursuit(fit_intercept=False) - elif reg_method == 'vbl': - _clf_poly = VBLinearRegression(fit_intercept=False) + # Init the regression object + _clf_poly = self.regression_dict[reg_method](**kwargs) - elif reg_method == 'ebl': - _clf_poly = EBLinearRegression(optimizer='em') + # Apply any other settings + if reg_method == 'brr': + _clf_poly.converged = True # Do the fit _clf_poly.fit(X, y) @@ -517,6 +498,7 @@ class PCE(MetaModel): return_out_dict['coeffs'] = coeffs return return_out_dict + # -------------------------------------------------------------------------------------------------------- def adaptive_regression(self, X, y, varIdx, verbose=False): """ @@ -761,7 +743,7 @@ class PCE(MetaModel): mean = np.empty((len(samples), len(values))) std = np.empty((len(samples), len(values))) idx = 0 - for in_key, InIdxValues in values.items(): + for in_key, _ in values.items(): # Assemble Psi matrix basis = self._basis_dict[f'b_{b_i + 1}'][output][in_key] @@ -826,7 +808,7 @@ class PCE(MetaModel): return mean_pred, std_pred # ------------------------------------------------------------------------- - def __select_degree(self, ndim, n_samples): + def __select_degree(self):#, ndim, n_samples): """ Selects degree based on the number of samples and parameters in the sequential design. @@ -936,7 +918,7 @@ class PCE(MetaModel): pce_mean = np.zeros((len(coef_dict))) pce_var = np.zeros((len(coef_dict))) - for index, values in coef_dict.items(): + for index, _ in coef_dict.items(): idx = int(index.split('_')[1]) - 1 coeffs = self._coeffs_dict[f'b_{b_i + 1}'][output][index] diff --git a/src/bayesvalidrox/surrogate_models/surrogate_models.py b/src/bayesvalidrox/surrogate_models/surrogate_models.py index 5446c9968e72122ad8fc138e35193ddc94be27d3..133c420ad82787e5f424f0d613eaef620e63dcbb 100644 --- a/src/bayesvalidrox/surrogate_models/surrogate_models.py +++ b/src/bayesvalidrox/surrogate_models/surrogate_models.py @@ -115,7 +115,7 @@ def _preprocessing_fit(fit_function): return decorator -def _bootstrap(fit_function): +def _bootstrap_fit(fit_function): """ Decorator that applies bootstrap iterations around the training of a MetaModel. @@ -293,7 +293,7 @@ class MetaModel: # Other params self.InputSpace = None - self.out_names = None + self.out_names = [] self.n_samples = None self.LCerror = None # TODO: only used in loocv-exploration?! Not assigned correctly in PCE class??? self.LOOCV_score_dict = None # TODO: generic function that calculates this score!?