From b2c34406c2bb66d8d793633f87f6b3208edee757 Mon Sep 17 00:00:00 2001 From: faridm69 <faridmohammadi69@gmail.com> Date: Tue, 21 Jul 2020 16:07:54 +0200 Subject: [PATCH] [surrogate][FastARD] fixed bugs in the FastARDRegression class. --- .../surrogate_models/RegressionFastARD.py | 20 ++++++++-- .../surrogate_models/surrogate_models.py | 37 ++++++++++--------- .../Test_AnalyticalFunction.py | 10 ++--- .../tests/PollutionTest/Pollution_Test.py | 4 +- 4 files changed, 41 insertions(+), 30 deletions(-) diff --git a/BayesValidRox/surrogate_models/RegressionFastARD.py b/BayesValidRox/surrogate_models/RegressionFastARD.py index 162f1fa69..da9a1ff1e 100755 --- a/BayesValidRox/surrogate_models/RegressionFastARD.py +++ b/BayesValidRox/surrogate_models/RegressionFastARD.py @@ -196,8 +196,13 @@ class RegressionFastARD(): Returns self. ''' X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) - X, y, X_mean, y_mean, X_std = self._center_data(X, y) n_samples, n_features = X.shape + + X, y, X_mean, y_mean, X_std = self._center_data(X, y) + self._x_mean_ = X_mean + self._y_mean = y_mean + self._x_std = X_std + # precompute X'*Y , X'*X for faster iterations & allocate memory for # sparsity & quality vectors @@ -228,7 +233,7 @@ class RegressionFastARD(): else: # in case of almost perfect multicollinearity between some features # start from feature 0 - if np.sum( XXd - X_mean**2 < np.finfo(np.float32).eps ) >0: #>= 0: + if np.sum( XXd - X_mean**2 < np.finfo(np.float32).eps ) > 0:#>= 0: A[0] = np.finfo(np.float16).eps active[0] = True @@ -265,6 +270,11 @@ class RegressionFastARD(): # update precision parameter for noise distribution rss = np.sum( ( y - np.dot(X[:,active] , Mn) )**2 ) + # if near perfect fit , then terminate + if rss / n_samples < self.tol: + warnings.warn('Early termination due to near perfect fit') + converged = True + break beta = n_samples - np.sum(active) + np.sum(Aa * Sdiag ) beta /= ( rss + np.finfo(np.float32).eps ) @@ -339,10 +349,12 @@ class RegressionFastARD(): var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution ''' - y_hat = np.dot(X[:,self.active_],self.coef_[self.active_]) + x = (X - self._x_mean_) / self._x_std + y_hat = np.dot(x,self.coef_) + self._y_mean + if return_std: var_hat = 1./self.alpha_ - var_hat += np.sum( np.dot(X[:,self.active_],self.sigma_) * X[:,self.active_], axis = 1) + var_hat += np.sum( np.dot(x[:,self.active_],self.sigma_) * x[:,self.active_], axis = 1) std_hat = np.sqrt(var_hat) return y_hat, std_hat else: diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py index a10eb417e..c1277875a 100644 --- a/BayesValidRox/surrogate_models/surrogate_models.py +++ b/BayesValidRox/surrogate_models/surrogate_models.py @@ -389,14 +389,14 @@ class aPCE: # Calculate the univariate polynomials for parIdx in range(NofPa): + polycoeffs = self.poly_rec_coeffs(n_max, polytypes[parIdx], parIdx) for deg in range(P): univ_vals[:,parIdx,deg] = univ_kernel(ExpDesignX[:,parIdx], deg) - + # ---------------- else: # Only for Uniform, Normal, Gamma, Beta - for parIdx in range(NofPa): AB, dist = self.poly_rec_coeffs(n_max, polytypes[parIdx], parIdx) @@ -408,7 +408,7 @@ class aPCE: U = dist.inv(u) univ_vals[:,parIdx,:] = self.eval_rec_rule(U,AB,nonrecursive=False) - + return univ_vals #-------------------------------------------------------------------------------------------------------- def PCE_create_Psi(self,BasisIndices,univ_p_val): @@ -490,27 +490,29 @@ class aPCE: Lambda = 1 / np.var(ModelOutput) if np.var(ModelOutput) !=0 else 1e-6 # Bayes sparse adaptive aPCE - if RegMethod != 'OLS': - if RegMethod == 'BRR' or np.all(ModelOutput==0): + if RegMethod.lower() != 'ols': + if RegMethod.lower() == 'brr' or np.all(ModelOutput==0): clf_poly = linear_model.BayesianRidge(n_iter=1000, tol=1e-7, - fit_intercept=False, + fit_intercept=True, compute_score=compute_score, lambda_1=Lambda, lambda_2=Lambda) - elif RegMethod == 'ARD': - clf_poly = ARDRegression(fit_intercept=False,compute_score=compute_score, + elif RegMethod.lower() == 'ard': + clf_poly = ARDRegression(fit_intercept=True,compute_score=compute_score, n_iter=1000, tol= 0.001) # alpha_1=1e-6, alpha_2=1e-6, # lambda_1=Lambda, lambda_2=Lambda) - elif RegMethod == 'FastARD': - clf_poly = RegressionFastARD(start=startBasisIndices,fit_intercept=False,compute_score=compute_score, + elif RegMethod.lower() == 'fastard': + clf_poly = RegressionFastARD(start=startBasisIndices, + fit_intercept=True, + compute_score=compute_score, n_iter=1500, tol=1e-3) - elif RegMethod == 'LARS': + elif RegMethod.lower() == 'lars': clf_poly = linear_model.Lars(fit_intercept=False) - elif RegMethod == 'SGDR': + elif RegMethod.lower() == 'sgdr': clf_poly = linear_model.SGDRegressor(fit_intercept = False, max_iter=5000, tol=1e-7) # Fit @@ -522,7 +524,7 @@ class aPCE: nnz_idx = np.nonzero(clf_poly.coef_)[0] if len(nnz_idx) == 0 or nnz_idx[0] != 0: - #print("\nWarning: Adding the first regressor.") + # print("\nWarning: Adding the first regressor.") nnz_idx = np.insert(np.nonzero(clf_poly.coef_)[0], 0, 0) # Remove the zero entries for Bases and PSI if need be @@ -629,7 +631,7 @@ class aPCE: # Calculate and save the score of LOOCV score, LCerror = self.CorrectedLeaveoutError(sparsePSI, Coeffs, ModelOutput, clf_poly) - + # Check the convergence of noise for FastARD if self.RegMethod == 'FastARD' and clf_poly.alpha_ < np.finfo(np.float32).eps: score = -np.inf @@ -685,7 +687,6 @@ class aPCE: # ------------------ Summary of results ------------------ # Select the one with the best score and save the necessary outputs - print("scores:",scores) bestIdx = np.nanargmax(scores)+1 coeffs = AllCoeffs[str(bestIdx)] PolynomialDegrees = AllIndices_Sparse[str(bestIdx)] @@ -864,12 +865,12 @@ class aPCE: from sklearn.decomposition import PCA as sklearnPCA # from sklearn.decomposition import TruncatedSVD as TruncatedSVD n_samples, n_features = Output.shape - covar_matrix = sklearnPCA(n_components=n_features) #None + covar_matrix = sklearnPCA(n_components=None) covar_matrix.fit(Output) var = np.cumsum(np.round(covar_matrix.explained_variance_ratio_, decimals=5)*100) try: - selected_n_components = np.where(var>=99.999)[0][0] + 1 + selected_n_components = np.where(var>=99.99)[0][0] + 1 except: selected_n_components = min(n_samples, n_features) @@ -877,7 +878,7 @@ class aPCE: pca = sklearnPCA(n_components=nComponents) OutputMatrix = pca.fit_transform(Output) - + return pca, OutputMatrix #-------------------------------------------------------------------------------------------------------- diff --git a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py index bf4ccf4c6..f074d439b 100755 --- a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py +++ b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py @@ -80,7 +80,7 @@ if __name__ == "__main__": # inputParams = np.load('data/InputParameters_{}.npy'.format(ndim)) # for i in range(ndim): # Inputs.addMarginals() - # Inputs.Marginals[i].Name = 'x'+str(i+1) + # Inputs.Marginals[i].Name = '$X_{%s}$'%(i+1) # Inputs.Marginals[i].InputValues = inputParams[:,i] #===================================================== @@ -103,7 +103,6 @@ if __name__ == "__main__": # 1)OLS: Ordinary Least Square 2)BRR: Bayesian Ridge Regression # 3)LARS: Least angle regression 4)ARD: Bayesian ARD Regression # 5)FastARD: Fast Bayesian ARD Regression - # 6)SGDR: Stochastic gradient descent learning # MetaModelOpts.metaModel = 'PCEKriging' MetaModelOpts.RegMethod = 'FastARD' MetaModelOpts.DimRedMethod = 'PCA' @@ -124,7 +123,7 @@ if __name__ == "__main__": # Sampling methods # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) chebyshev(FT) # 7) korobov 8) grid(FT) 9) nested_grid(FT) 10)user - MetaModelOpts.ExpDesign.SamplingMethod = 'latin_hypercube' + MetaModelOpts.ExpDesign.SamplingMethod = 'random' # Sequential experimental design (needed only for sequential ExpDesign) MetaModelOpts.ExpDesign.NrofNewSample = 1 @@ -221,7 +220,7 @@ if __name__ == "__main__": # Compute and print RMSE error PostPCE.relErrorPCEModel(nSamples=3000) - sys.exit('STOP!!') + # sys.exit('STOP!!') #===================================================== #======== Bayesian inference with Emulator ========== #===================================================== @@ -229,8 +228,7 @@ if __name__ == "__main__": BayesOpts.emulator = True # BME Bootstrap - BayesOpts.Bootstrap = True - BayesOpts.NrofSamples = 100000 + BayesOpts.Bootstrap = False BayesOpts.BootstrapItrNr = 10 BayesOpts.BootstrapNoise = 0.05 diff --git a/BayesValidRox/tests/PollutionTest/Pollution_Test.py b/BayesValidRox/tests/PollutionTest/Pollution_Test.py index 6507a7500..55d84c133 100644 --- a/BayesValidRox/tests/PollutionTest/Pollution_Test.py +++ b/BayesValidRox/tests/PollutionTest/Pollution_Test.py @@ -36,7 +36,7 @@ if __name__ == "__main__": #===================================================== #============= COMPUTATIONAL MODEL ================ #===================================================== - Model = FuncForwardModel('Pollution') + Model = FuncForwardModel() Model.Type = 'Function' Model.pyFile = 'Pollution' @@ -107,7 +107,7 @@ if __name__ == "__main__": NrofInitSamples = 200 MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples - MetaModelOpts.ExpDesign.SamplingMethod = 'MC' # 1)MC 2)LHS 3)PCM 4)LSCM 5)user + MetaModelOpts.ExpDesign.SamplingMethod = 'random' # 1)MC 2)LHS 3)PCM 4)LSCM 5)user MetaModelOpts.ExpDesign.Method = 'normal' # 1) normal 2) sequential # Sequential experimental design (needed only for sequential ExpDesign) -- GitLab