From b2c34406c2bb66d8d793633f87f6b3208edee757 Mon Sep 17 00:00:00 2001
From: faridm69 <faridmohammadi69@gmail.com>
Date: Tue, 21 Jul 2020 16:07:54 +0200
Subject: [PATCH] [surrogate][FastARD] fixed bugs in the FastARDRegression
 class.

---
 .../surrogate_models/RegressionFastARD.py     | 20 ++++++++--
 .../surrogate_models/surrogate_models.py      | 37 ++++++++++---------
 .../Test_AnalyticalFunction.py                | 10 ++---
 .../tests/PollutionTest/Pollution_Test.py     |  4 +-
 4 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/BayesValidRox/surrogate_models/RegressionFastARD.py b/BayesValidRox/surrogate_models/RegressionFastARD.py
index 162f1fa69..da9a1ff1e 100755
--- a/BayesValidRox/surrogate_models/RegressionFastARD.py
+++ b/BayesValidRox/surrogate_models/RegressionFastARD.py
@@ -196,8 +196,13 @@ class RegressionFastARD():
             Returns self.
         '''
         X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
-        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
         n_samples, n_features = X.shape
+        
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
+        self._x_mean_ = X_mean
+        self._y_mean  = y_mean
+        self._x_std   = X_std
+        
 
         #  precompute X'*Y , X'*X for faster iterations & allocate memory for
         #  sparsity & quality vectors
@@ -228,7 +233,7 @@ class RegressionFastARD():
         else:
             # in case of almost perfect multicollinearity between some features
             # start from feature 0
-            if np.sum( XXd - X_mean**2 < np.finfo(np.float32).eps ) >0: #>= 0:
+            if np.sum( XXd - X_mean**2 < np.finfo(np.float32).eps ) > 0:#>= 0:
                 A[0]       = np.finfo(np.float16).eps
                 active[0]  = True
             
@@ -265,6 +270,11 @@ class RegressionFastARD():
                 
             # update precision parameter for noise distribution
             rss     = np.sum( ( y - np.dot(X[:,active] , Mn) )**2 )
+            # if near perfect fit , then terminate
+            if rss / n_samples < self.tol:
+                warnings.warn('Early termination due to near perfect fit')
+                converged = True
+                break
             beta    = n_samples - np.sum(active) + np.sum(Aa * Sdiag )
             beta   /= ( rss + np.finfo(np.float32).eps )
 
@@ -339,10 +349,12 @@ class RegressionFastARD():
              var_hat: numpy array of size (n_samples_test,)
                     Variance of predictive distribution
         '''
-        y_hat     = np.dot(X[:,self.active_],self.coef_[self.active_])
+        x         = (X - self._x_mean_) / self._x_std
+        y_hat     = np.dot(x,self.coef_) + self._y_mean
+        
         if return_std:
             var_hat   = 1./self.alpha_
-            var_hat  += np.sum( np.dot(X[:,self.active_],self.sigma_) * X[:,self.active_], axis = 1)
+            var_hat  += np.sum( np.dot(x[:,self.active_],self.sigma_) * x[:,self.active_], axis = 1)
             std_hat = np.sqrt(var_hat)
             return y_hat, std_hat
         else:
diff --git a/BayesValidRox/surrogate_models/surrogate_models.py b/BayesValidRox/surrogate_models/surrogate_models.py
index a10eb417e..c1277875a 100644
--- a/BayesValidRox/surrogate_models/surrogate_models.py
+++ b/BayesValidRox/surrogate_models/surrogate_models.py
@@ -389,14 +389,14 @@ class aPCE:
             
             # Calculate the univariate polynomials
             for parIdx in range(NofPa):
+                
                 polycoeffs = self.poly_rec_coeffs(n_max, polytypes[parIdx], parIdx)
                 
                 for deg in range(P):
                     univ_vals[:,parIdx,deg] = univ_kernel(ExpDesignX[:,parIdx], deg)
-        
+
         # ----------------
         else: # Only for Uniform, Normal, Gamma, Beta
-            
             for parIdx in range(NofPa):
                 
                 AB, dist = self.poly_rec_coeffs(n_max, polytypes[parIdx], parIdx)
@@ -408,7 +408,7 @@ class aPCE:
                 U = dist.inv(u)
                 
                 univ_vals[:,parIdx,:] = self.eval_rec_rule(U,AB,nonrecursive=False)
-        
+            
         return univ_vals
     #--------------------------------------------------------------------------------------------------------
     def PCE_create_Psi(self,BasisIndices,univ_p_val):
@@ -490,27 +490,29 @@ class aPCE:
         Lambda = 1 / np.var(ModelOutput) if np.var(ModelOutput) !=0 else 1e-6
 
         # Bayes sparse adaptive aPCE
-        if RegMethod != 'OLS':
-            if RegMethod == 'BRR' or np.all(ModelOutput==0):
+        if RegMethod.lower() != 'ols':
+            if RegMethod.lower() == 'brr' or np.all(ModelOutput==0):
                 clf_poly = linear_model.BayesianRidge(n_iter=1000, tol=1e-7,
-                                                      fit_intercept=False, 
+                                                      fit_intercept=True, 
                                                       compute_score=compute_score,
                                                       lambda_1=Lambda, lambda_2=Lambda)
             
-            elif RegMethod == 'ARD':
-                clf_poly = ARDRegression(fit_intercept=False,compute_score=compute_score,
+            elif RegMethod.lower() == 'ard':
+                clf_poly = ARDRegression(fit_intercept=True,compute_score=compute_score,
                                         n_iter=1000, tol= 0.001)
                                         # alpha_1=1e-6, alpha_2=1e-6, 
                                         # lambda_1=Lambda, lambda_2=Lambda)
             
-            elif RegMethod == 'FastARD':
-                clf_poly = RegressionFastARD(start=startBasisIndices,fit_intercept=False,compute_score=compute_score,
+            elif RegMethod.lower() == 'fastard':
+                clf_poly = RegressionFastARD(start=startBasisIndices,
+                                             fit_intercept=True,
+                                             compute_score=compute_score,
                                               n_iter=1500, tol=1e-3)
                 
-            elif RegMethod == 'LARS':
+            elif RegMethod.lower() == 'lars':
                 clf_poly = linear_model.Lars(fit_intercept=False)
             
-            elif RegMethod == 'SGDR':
+            elif RegMethod.lower() == 'sgdr':
                 clf_poly = linear_model.SGDRegressor(fit_intercept = False, max_iter=5000, tol=1e-7)
             
             # Fit
@@ -522,7 +524,7 @@ class aPCE:
             nnz_idx = np.nonzero(clf_poly.coef_)[0]
 
             if len(nnz_idx) == 0 or nnz_idx[0] != 0:
-                #print("\nWarning: Adding the first regressor.")
+                # print("\nWarning: Adding the first regressor.")
                 nnz_idx = np.insert(np.nonzero(clf_poly.coef_)[0], 0, 0)
                 
             # Remove the zero entries for Bases and PSI if need be
@@ -629,7 +631,7 @@ class aPCE:
                 
                 # Calculate and save the score of LOOCV
                 score, LCerror = self.CorrectedLeaveoutError(sparsePSI, Coeffs, ModelOutput, clf_poly)
-                
+
                 # Check the convergence of noise for FastARD
                 if self.RegMethod == 'FastARD' and clf_poly.alpha_ < np.finfo(np.float32).eps:
                     score = -np.inf
@@ -685,7 +687,6 @@ class aPCE:
 
         # ------------------ Summary of results ------------------
         # Select the one with the best score and save the necessary outputs
-        print("scores:",scores)
         bestIdx = np.nanargmax(scores)+1
         coeffs = AllCoeffs[str(bestIdx)]
         PolynomialDegrees = AllIndices_Sparse[str(bestIdx)]
@@ -864,12 +865,12 @@ class aPCE:
         from sklearn.decomposition import PCA as sklearnPCA
         # from sklearn.decomposition import TruncatedSVD as TruncatedSVD
         n_samples, n_features = Output.shape
-        covar_matrix = sklearnPCA(n_components=n_features) #None
+        covar_matrix = sklearnPCA(n_components=None)
         covar_matrix.fit(Output)
         var = np.cumsum(np.round(covar_matrix.explained_variance_ratio_, decimals=5)*100)
         
         try:
-            selected_n_components = np.where(var>=99.999)[0][0] + 1
+            selected_n_components = np.where(var>=99.99)[0][0] + 1
         except:
             selected_n_components = min(n_samples, n_features)
         
@@ -877,7 +878,7 @@ class aPCE:
 
         pca = sklearnPCA(n_components=nComponents)
         OutputMatrix = pca.fit_transform(Output)
-        
+
         return pca, OutputMatrix
 
     #--------------------------------------------------------------------------------------------------------
diff --git a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
index bf4ccf4c6..f074d439b 100755
--- a/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
+++ b/BayesValidRox/tests/AnalyticalFunction/Test_AnalyticalFunction.py
@@ -80,7 +80,7 @@ if __name__ == "__main__":
     # inputParams = np.load('data/InputParameters_{}.npy'.format(ndim))
     # for i in range(ndim):
     #     Inputs.addMarginals()
-    #     Inputs.Marginals[i].Name = 'x'+str(i+1)
+    #     Inputs.Marginals[i].Name = '$X_{%s}$'%(i+1)
     #     Inputs.Marginals[i].InputValues = inputParams[:,i]
     
     #=====================================================
@@ -103,7 +103,6 @@ if __name__ == "__main__":
     # 1)OLS: Ordinary Least Square  2)BRR: Bayesian Ridge Regression 
     # 3)LARS: Least angle regression  4)ARD: Bayesian ARD Regression
     # 5)FastARD: Fast Bayesian ARD Regression
-    # 6)SGDR: Stochastic gradient descent learning
     # MetaModelOpts.metaModel = 'PCEKriging'
     MetaModelOpts.RegMethod = 'FastARD'
     MetaModelOpts.DimRedMethod = 'PCA'
@@ -124,7 +123,7 @@ if __name__ == "__main__":
     # Sampling methods
     # 1) random 2) latin_hypercube 3) sobol 4) halton 5) hammersley 6) chebyshev(FT) 
     # 7) korobov 8) grid(FT) 9) nested_grid(FT) 10)user
-    MetaModelOpts.ExpDesign.SamplingMethod = 'latin_hypercube'
+    MetaModelOpts.ExpDesign.SamplingMethod = 'random'
     
     # Sequential experimental design (needed only for sequential ExpDesign)
     MetaModelOpts.ExpDesign.NrofNewSample = 1
@@ -221,7 +220,7 @@ if __name__ == "__main__":
     
     # Compute and print RMSE error
     PostPCE.relErrorPCEModel(nSamples=3000)
-    sys.exit('STOP!!')
+    # sys.exit('STOP!!')
     #=====================================================
     #========  Bayesian inference with Emulator ==========
     #=====================================================
@@ -229,8 +228,7 @@ if __name__ == "__main__":
     BayesOpts.emulator = True
     
     # BME Bootstrap
-    BayesOpts.Bootstrap = True
-    BayesOpts.NrofSamples = 100000
+    BayesOpts.Bootstrap = False
     BayesOpts.BootstrapItrNr = 10
     BayesOpts.BootstrapNoise = 0.05
     
diff --git a/BayesValidRox/tests/PollutionTest/Pollution_Test.py b/BayesValidRox/tests/PollutionTest/Pollution_Test.py
index 6507a7500..55d84c133 100644
--- a/BayesValidRox/tests/PollutionTest/Pollution_Test.py
+++ b/BayesValidRox/tests/PollutionTest/Pollution_Test.py
@@ -36,7 +36,7 @@ if __name__ == "__main__":
     #=====================================================
     #=============   COMPUTATIONAL MODEL  ================
     #=====================================================
-    Model = FuncForwardModel('Pollution')
+    Model = FuncForwardModel()
     
     Model.Type = 'Function'
     Model.pyFile = 'Pollution'
@@ -107,7 +107,7 @@ if __name__ == "__main__":
     
     NrofInitSamples = 200
     MetaModelOpts.ExpDesign.NrSamples = NrofInitSamples
-    MetaModelOpts.ExpDesign.SamplingMethod = 'MC' # 1)MC 2)LHS 3)PCM 4)LSCM 5)user
+    MetaModelOpts.ExpDesign.SamplingMethod = 'random' # 1)MC 2)LHS 3)PCM 4)LSCM 5)user
     MetaModelOpts.ExpDesign.Method = 'normal'  # 1) normal  2) sequential
     
     # Sequential experimental design (needed only for sequential ExpDesign)
-- 
GitLab