Module bayesvalidrox.surrogate_models.bayes_linear

Expand source code
import numpy as np
from sklearn.base import RegressorMixin
from sklearn.linear_model._base import LinearModel
from sklearn.utils import check_X_y, check_array, as_float_array
from sklearn.utils.validation import check_is_fitted
from scipy.linalg import svd
import warnings
from sklearn.preprocessing import normalize as f_normalize



class BayesianLinearRegression(RegressorMixin,LinearModel):
    '''
    Superclass for Empirical Bayes and Variational Bayes implementations of 
    Bayesian Linear Regression Model
    '''
    def __init__(self, n_iter, tol, fit_intercept,copy_X, verbose):
        self.n_iter        = n_iter
        self.fit_intercept = fit_intercept
        self.copy_X        = copy_X
        self.verbose       = verbose
        self.tol           = tol
        
        
    def _check_convergence(self, mu, mu_old):
        '''
        Checks convergence of algorithm using changes in mean of posterior
        distribution of weights
        '''
        return np.sum(abs(mu-mu_old)>self.tol) == 0
        
        
    def _center_data(self,X,y):
        ''' Centers data'''
        X     = as_float_array(X,self.copy_X)
        # normalisation should be done in preprocessing!
        X_std = np.ones(X.shape[1], dtype = X.dtype)
        if self.fit_intercept:
            X_mean = np.average(X,axis = 0)
            y_mean = np.average(y,axis = 0)
            X     -= X_mean
            y      = y - y_mean
        else:
            X_mean = np.zeros(X.shape[1],dtype = X.dtype)
            y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
        return X,y, X_mean, y_mean, X_std
        
        
    def predict_dist(self,X):
        '''
        Calculates  mean and variance of predictive distribution for each data 
        point of test set.(Note predictive distribution for each data point is 
        Gaussian, therefore it is uniquely determined by mean and variance)                    
                    
        Parameters
        ----------
        x: array-like of size (n_test_samples, n_features)
            Set of features for which corresponding responses should be predicted

        Returns
        -------
        :list of two numpy arrays [mu_pred, var_pred]
        
            mu_pred : numpy array of size (n_test_samples,)
                      Mean of predictive distribution
                      
            var_pred: numpy array of size (n_test_samples,)
                      Variance of predictive distribution        
        '''
        # Note check_array and check_is_fitted are done within self._decision_function(X)
        mu_pred     = self._decision_function(X)
        data_noise  = 1./self.beta_
        model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
        var_pred    =  data_noise + model_noise
        return [mu_pred,var_pred]
    
        
        

class EBLinearRegression(BayesianLinearRegression):
    '''
    Bayesian Regression with type II maximum likelihood (Empirical Bayes)
    
    Parameters:
    -----------  
    n_iter: int, optional (DEFAULT = 300)
       Maximum number of iterations
         
    tol: float, optional (DEFAULT = 1e-3)
       Threshold for convergence
       
    optimizer: str, optional (DEFAULT = 'fp')
       Method for optimization , either Expectation Maximization or 
       Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are
       faster, but can be numerically unstable (especially in case of near perfect fit).
       
    fit_intercept: bool, optional (DEFAULT = True)
       If True includes bias term in model
       
    perfect_fit_tol: float (DEAFAULT = 1e-5)
       Prevents overflow of precision parameters (this is smallest value RSS can have).
       ( !!! Note if using EM instead of fixed-point, try smaller values
       of perfect_fit_tol, for better estimates of variance of predictive distribution )

    alpha: float (DEFAULT = 1)
       Initial value of precision paramter for coefficients ( by default we define 
       very broad distribution )
       
    copy_X : boolean, optional (DEFAULT = True)
        If True, X will be copied, otherwise will be 
        
    verbose: bool, optional (Default = False)
       If True at each iteration progress report is printed out
    
    Attributes
    ----------
    coef_  : array, shape = (n_features)
        Coefficients of the regression model (mean of posterior distribution)
        
    intercept_: float
        Value of bias term (if fit_intercept is False, then intercept_ = 0)
        
    alpha_ : float
        Estimated precision of coefficients
       
    beta_  : float 
        Estimated precision of noise
        
    eigvals_ : array, shape = (n_features, )
        Eigenvalues of covariance matrix (from posterior distribution of weights)
        
    eigvecs_ : array, shape = (n_features, n_featues)
        Eigenvectors of covariance matrix (from posterior distribution of weights)

    '''
    
    def __init__(self,n_iter = 300, tol = 1e-3, optimizer = 'fp', fit_intercept = True,
                 normalize=True, perfect_fit_tol = 1e-6, alpha = 1, copy_X = True, verbose = False):
        super(EBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose)
        if optimizer not in ['em','fp']:
            raise ValueError('Optimizer can be either "em" of "fp" ')
        self.optimizer     =  optimizer 
        self.alpha         =  alpha 
        self.perfect_fit   =  False
        self.normalize     = True
        self.scores_       =  [np.NINF]
        self.perfect_fit_tol = perfect_fit_tol
    
    def _check_convergence(self, mu, mu_old):
        '''
        Checks convergence of algorithm using changes in mean of posterior
        distribution of weights
        '''
        return np.sum(abs(mu-mu_old)>self.tol) == 0
        
        
    def _center_data(self,X,y):
        ''' Centers data'''
        X     = as_float_array(X,self.copy_X)
        # normalisation should be done in preprocessing!
        X_std = np.ones(X.shape[1], dtype = X.dtype)
        if self.fit_intercept:
            X_mean = np.average(X, axis=0)
            X -= X_mean
            if self.normalize:
                X, X_std = f_normalize(X, axis=0, copy=False,
                                         return_norm=True)
            else:
                X_std = np.ones(X.shape[1], dtype=X.dtype)
            y_mean = np.average(y, axis=0)
            y = y - y_mean
        else:
            X_mean = np.zeros(X.shape[1],dtype = X.dtype)
            y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
        return X,y, X_mean, y_mean, X_std
            
    def fit(self, X, y):
        '''
        Fits Bayesian Linear Regression using Empirical Bayes
        
        Parameters
        ----------
        X: array-like of size [n_samples,n_features]
           Matrix of explanatory variables (should not include bias term)
       
        y: array-like of size [n_features]
           Vector of dependent variables.
           
        Returns
        -------
        object: self
          self
    
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
        n_samples, n_features = X.shape
        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
        self._x_mean_ = X_mean
        self._y_mean  = y_mean
        self._x_std   = X_std
        
        #  precision of noise & and coefficients
        alpha   =  self.alpha
        var_y  = np.var(y)
        # check that variance is non zero !!!
        if var_y == 0 :
            beta = 1e-2
        else:
            beta = 1. / np.var(y)

        # to speed all further computations save svd decomposition and reuse it later
        u,d,vt   = svd(X, full_matrices = False)
        Uy      = np.dot(u.T,y)
        dsq     = d**2
        mu      = 0
    
        for i in range(self.n_iter):
            
            # find mean for posterior of w ( for EM this is E-step)
            mu_old  =  mu
            if n_samples > n_features:
                 mu =  vt.T *  d/(dsq+alpha/beta) 
            else:
                 # clever use of SVD here , faster for large n_features
                 mu =  u * 1./(dsq + alpha/beta)
                 mu =  np.dot(X.T,mu)
            mu =  np.dot(mu,Uy)

            # precompute errors, since both methods use it in estimation
            error   = y - np.dot(X,mu)
            sqdErr  = np.sum(error**2)
            
            if sqdErr / n_samples < self.perfect_fit_tol:
                self.perfect_fit = True
                warnings.warn( ('Almost perfect fit!!! Estimated values of variance '
                                'for predictive distribution are computed using only RSS'))
                break
            
            if self.optimizer == "fp":           
                gamma      =  np.sum(beta*dsq/(beta*dsq + alpha))
                # use updated mu and gamma parameters to update alpha and beta
                # !!! made computation numerically stable for perfect fit case
                alpha      =   gamma  / (np.sum(mu**2) + np.finfo(np.float32).eps )
                beta       =  ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps )
            else:             
                # M-step, update parameters alpha and beta to maximize ML TYPE II
                eigvals    = 1. / (beta * dsq + alpha)
                alpha      = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) )
                beta       = n_samples / ( sqdErr + np.sum(dsq/eigvals) )

            # if converged or exceeded maximum number of iterations => terminate
            converged = self._check_convergence(mu_old,mu)
            if self.verbose:
                print( "Iteration {0} completed".format(i) )
                if converged is True:
                    print("Algorithm converged after {0} iterations".format(i))
            if converged or i==self.n_iter -1:
                break
        eigvals       = 1./(beta * dsq + alpha)
        self.coef_    = beta*np.dot(vt.T*d*eigvals ,Uy)
        self._set_intercept(X_mean,y_mean,X_std)
        self.beta_    = beta
        self.alpha_   = alpha
        self.eigvals_ = eigvals
        self.eigvecs_ = vt.T
        
        # set intercept_
        if self.fit_intercept:
            self.coef_ = self.coef_ / X_std
            self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
        else:
            self.intercept_ = 0.

        return self
    
    def predict(self,X, return_std=False):
        '''
        Computes predictive distribution for test set.
        Predictive distribution for each data point is one dimensional
        Gaussian and therefore is characterised by mean and variance.
        
        Parameters
        -----------
        X: {array-like, sparse} (n_samples_test, n_features)
           Test data, matrix of explanatory variables
           
        Returns
        -------
        : list of length two [y_hat, var_hat]
        
             y_hat: numpy array of size (n_samples_test,)
                    Estimated values of targets on test set (i.e. mean of predictive
                    distribution)
           
             var_hat: numpy array of size (n_samples_test,)
                    Variance of predictive distribution
        '''
        y_hat     = np.dot(X,self.coef_) + self.intercept_
        
        if return_std:
            if self.normalize:
                X   = (X - self._x_mean_) / self._x_std
            data_noise  = 1./self.beta_
            model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
            var_pred    =  data_noise + model_noise
            std_hat = np.sqrt(var_pred)
            return y_hat, std_hat
        else:
            return y_hat
            
            
# ==============================  VBLR  =========================================

def gamma_mean(a,b):
    '''
    Computes mean of gamma distribution
    
    Parameters
    ----------
    a: float
      Shape parameter of Gamma distribution
    
    b: float
      Rate parameter of Gamma distribution
      
    Returns
    -------
    : float
      Mean of Gamma distribution
    '''
    return float(a) / b 
    


class VBLinearRegression(BayesianLinearRegression):
    '''
    Implements Bayesian Linear Regression using mean-field approximation.
    Assumes gamma prior on precision parameters of coefficients and noise.

    Parameters:
    -----------
    n_iter: int, optional (DEFAULT = 100)
       Maximum number of iterations for KL minimization

    tol: float, optional (DEFAULT = 1e-3)
       Convergence threshold
       
    fit_intercept: bool, optional (DEFAULT = True)
       If True will use bias term in model fitting

    a: float, optional (Default = 1e-4)
       Shape parameter of Gamma prior for precision of coefficients
       
    b: float, optional (Default = 1e-4)
       Rate parameter of Gamma prior for precision coefficients
       
    c: float, optional (Default = 1e-4)
       Shape parameter of  Gamma prior for precision of noise
       
    d: float, optional (Default = 1e-4)
       Rate parameter of  Gamma prior for precision of noise
       
    verbose: bool, optional (Default = False)
       If True at each iteration progress report is printed out
       
    Attributes
    ----------
    coef_  : array, shape = (n_features)
        Coefficients of the regression model (mean of posterior distribution)
        
    intercept_: float
        Value of bias term (if fit_intercept is False, then intercept_ = 0)
        
    alpha_ : float
        Mean of precision of coefficients
       
    beta_  : float 
        Mean of precision of noise

    eigvals_ : array, shape = (n_features, )
        Eigenvalues of covariance matrix (from posterior distribution of weights)
        
    eigvecs_ : array, shape = (n_features, n_featues)
        Eigenvectors of covariance matrix (from posterior distribution of weights)

    '''
    
    def __init__(self, n_iter = 100, tol =1e-4, fit_intercept = True, 
                 a = 1e-4, b = 1e-4, c = 1e-4, d = 1e-4, copy_X = True,
                 verbose = False):
        super(VBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X,
                                                verbose)
        self.a,self.b   =  a, b
        self.c,self.d   =  c, d

        
    def fit(self,X,y):
        '''
        Fits Variational Bayesian Linear Regression Model
        
        Parameters
        ----------
        X: array-like of size [n_samples,n_features]
           Matrix of explanatory variables (should not include bias term)
       
        Y: array-like of size [n_features]
           Vector of dependent variables.
           
        Returns
        -------
        object: self
          self
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
        n_samples, n_features = X.shape
        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
        self._x_mean_ = X_mean
        self._y_mean  = y_mean
        self._x_std   = X_std
        
        # SVD decomposition, done once , reused at each iteration
        u,D,vt = svd(X, full_matrices = False)
        dsq    = D**2
        UY     = np.dot(u.T,y)
        
        # some parameters of Gamma distribution have closed form solution
        a      = self.a + 0.5 * n_features
        c      = self.c + 0.5 * n_samples
        b,d    = self.b,  self.d
        
        # initial mean of posterior for coefficients
        mu     = 0
                
        for i in range(self.n_iter):
            
            # update parameters of distribution Q(weights)
            e_beta       = gamma_mean(c,d)
            e_alpha      = gamma_mean(a,b)
            mu_old       = np.copy(mu)
            mu,eigvals   = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X)
            
            # update parameters of distribution Q(precision of weights) 
            b            = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals))
            
            # update parameters of distribution Q(precision of likelihood)
            sqderr       = np.sum((y - np.dot(X,mu))**2)
            xsx          = np.sum(dsq*eigvals)
            d            = self.d + 0.5*(sqderr + xsx)
 
            # check convergence 
            converged = self._check_convergence(mu,mu_old)
            if self.verbose is True:
                print("Iteration {0} is completed".format(i))
                if converged is True:
                    print("Algorithm converged after {0} iterations".format(i))
               
            # terminate if convergence or maximum number of iterations are achieved
            if converged or i==(self.n_iter-1):
                break
            
        # save necessary parameters    
        self.beta_   = gamma_mean(c,d)
        self.alpha_  = gamma_mean(a,b)
        self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY,
                                                            dsq, u, vt, D, X)
        self._set_intercept(X_mean,y_mean,X_std)
        self.eigvecs_ = vt.T
        return self
        

    def _posterior_weights(self, e_beta, e_alpha, UY, dsq, u, vt, d, X):
        '''
        Calculates parameters of approximate posterior distribution 
        of weights
        '''
        # eigenvalues of covariance matrix
        sigma = 1./ (e_beta*dsq + e_alpha)
        
        # mean of approximate posterior distribution
        n_samples, n_features = X.shape
        if n_samples > n_features:
             mu =  vt.T *  d/(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps) 
        else:
             mu =  u * 1./(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps)
             mu =  np.dot(X.T,mu)
        mu =  np.dot(mu,UY)
        return mu,sigma
        
    def predict(self,X, return_std=False):
        '''
        Computes predictive distribution for test set.
        Predictive distribution for each data point is one dimensional
        Gaussian and therefore is characterised by mean and variance.
        
        Parameters
        -----------
        X: {array-like, sparse} (n_samples_test, n_features)
           Test data, matrix of explanatory variables
           
        Returns
        -------
        : list of length two [y_hat, var_hat]
        
             y_hat: numpy array of size (n_samples_test,)
                    Estimated values of targets on test set (i.e. mean of predictive
                    distribution)
           
             var_hat: numpy array of size (n_samples_test,)
                    Variance of predictive distribution
        '''
        x         = (X - self._x_mean_) / self._x_std
        y_hat     = np.dot(x,self.coef_) + self._y_mean
        
        if return_std:
            data_noise  = 1./self.beta_
            model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
            var_pred    =  data_noise + model_noise
            std_hat = np.sqrt(var_pred)
            return y_hat, std_hat
        else:
            return y_hat

Functions

def gamma_mean(a, b)

Computes mean of gamma distribution

Parameters

a : float
 

Shape parameter of Gamma distribution

b : float
 

Rate parameter of Gamma distribution

Returns

float Mean of Gamma distribution
Expand source code
def gamma_mean(a,b):
    '''
    Computes mean of gamma distribution
    
    Parameters
    ----------
    a: float
      Shape parameter of Gamma distribution
    
    b: float
      Rate parameter of Gamma distribution
      
    Returns
    -------
    : float
      Mean of Gamma distribution
    '''
    return float(a) / b 

Classes

class BayesianLinearRegression (n_iter, tol, fit_intercept, copy_X, verbose)

Superclass for Empirical Bayes and Variational Bayes implementations of Bayesian Linear Regression Model

Expand source code
class BayesianLinearRegression(RegressorMixin,LinearModel):
    '''
    Superclass for Empirical Bayes and Variational Bayes implementations of 
    Bayesian Linear Regression Model
    '''
    def __init__(self, n_iter, tol, fit_intercept,copy_X, verbose):
        self.n_iter        = n_iter
        self.fit_intercept = fit_intercept
        self.copy_X        = copy_X
        self.verbose       = verbose
        self.tol           = tol
        
        
    def _check_convergence(self, mu, mu_old):
        '''
        Checks convergence of algorithm using changes in mean of posterior
        distribution of weights
        '''
        return np.sum(abs(mu-mu_old)>self.tol) == 0
        
        
    def _center_data(self,X,y):
        ''' Centers data'''
        X     = as_float_array(X,self.copy_X)
        # normalisation should be done in preprocessing!
        X_std = np.ones(X.shape[1], dtype = X.dtype)
        if self.fit_intercept:
            X_mean = np.average(X,axis = 0)
            y_mean = np.average(y,axis = 0)
            X     -= X_mean
            y      = y - y_mean
        else:
            X_mean = np.zeros(X.shape[1],dtype = X.dtype)
            y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
        return X,y, X_mean, y_mean, X_std
        
        
    def predict_dist(self,X):
        '''
        Calculates  mean and variance of predictive distribution for each data 
        point of test set.(Note predictive distribution for each data point is 
        Gaussian, therefore it is uniquely determined by mean and variance)                    
                    
        Parameters
        ----------
        x: array-like of size (n_test_samples, n_features)
            Set of features for which corresponding responses should be predicted

        Returns
        -------
        :list of two numpy arrays [mu_pred, var_pred]
        
            mu_pred : numpy array of size (n_test_samples,)
                      Mean of predictive distribution
                      
            var_pred: numpy array of size (n_test_samples,)
                      Variance of predictive distribution        
        '''
        # Note check_array and check_is_fitted are done within self._decision_function(X)
        mu_pred     = self._decision_function(X)
        data_noise  = 1./self.beta_
        model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
        var_pred    =  data_noise + model_noise
        return [mu_pred,var_pred]

Ancestors

  • sklearn.base.RegressorMixin
  • sklearn.linear_model._base.LinearModel
  • sklearn.base.BaseEstimator

Subclasses

Methods

def predict_dist(self, X)

Calculates mean and variance of predictive distribution for each data point of test set.(Note predictive distribution for each data point is Gaussian, therefore it is uniquely determined by mean and variance)

Parameters

x : array-like of size (n_test_samples, n_features)
Set of features for which corresponding responses should be predicted

Returns

:list of two numpy arrays [mu_pred, var_pred]

mu_pred : numpy array of size (n_test_samples,)
          Mean of predictive distribution

var_pred: numpy array of size (n_test_samples,)
          Variance of predictive distribution
Expand source code
def predict_dist(self,X):
    '''
    Calculates  mean and variance of predictive distribution for each data 
    point of test set.(Note predictive distribution for each data point is 
    Gaussian, therefore it is uniquely determined by mean and variance)                    
                
    Parameters
    ----------
    x: array-like of size (n_test_samples, n_features)
        Set of features for which corresponding responses should be predicted

    Returns
    -------
    :list of two numpy arrays [mu_pred, var_pred]
    
        mu_pred : numpy array of size (n_test_samples,)
                  Mean of predictive distribution
                  
        var_pred: numpy array of size (n_test_samples,)
                  Variance of predictive distribution        
    '''
    # Note check_array and check_is_fitted are done within self._decision_function(X)
    mu_pred     = self._decision_function(X)
    data_noise  = 1./self.beta_
    model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
    var_pred    =  data_noise + model_noise
    return [mu_pred,var_pred]
class EBLinearRegression (n_iter=300, tol=0.001, optimizer='fp', fit_intercept=True, normalize=True, perfect_fit_tol=1e-06, alpha=1, copy_X=True, verbose=False)

Bayesian Regression with type II maximum likelihood (Empirical Bayes)

Parameters:

n_iter: int, optional (DEFAULT = 300) Maximum number of iterations

tol: float, optional (DEFAULT = 1e-3) Threshold for convergence

optimizer: str, optional (DEFAULT = 'fp') Method for optimization , either Expectation Maximization or Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are faster, but can be numerically unstable (especially in case of near perfect fit).

fit_intercept: bool, optional (DEFAULT = True) If True includes bias term in model

perfect_fit_tol: float (DEAFAULT = 1e-5) Prevents overflow of precision parameters (this is smallest value RSS can have). ( !!! Note if using EM instead of fixed-point, try smaller values of perfect_fit_tol, for better estimates of variance of predictive distribution )

alpha: float (DEFAULT = 1) Initial value of precision paramter for coefficients ( by default we define very broad distribution )

copy_X : boolean, optional (DEFAULT = True) If True, X will be copied, otherwise will be

verbose: bool, optional (Default = False) If True at each iteration progress report is printed out

Attributes

coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution)

intercept_ : float
Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_ : float
Estimated precision of coefficients

beta_ : float Estimated precision of noise

eigvals_ : array, shape = (n_features, )
Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_ : array, shape = (n_features, n_featues)
Eigenvectors of covariance matrix (from posterior distribution of weights)
Expand source code
class EBLinearRegression(BayesianLinearRegression):
    '''
    Bayesian Regression with type II maximum likelihood (Empirical Bayes)
    
    Parameters:
    -----------  
    n_iter: int, optional (DEFAULT = 300)
       Maximum number of iterations
         
    tol: float, optional (DEFAULT = 1e-3)
       Threshold for convergence
       
    optimizer: str, optional (DEFAULT = 'fp')
       Method for optimization , either Expectation Maximization or 
       Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are
       faster, but can be numerically unstable (especially in case of near perfect fit).
       
    fit_intercept: bool, optional (DEFAULT = True)
       If True includes bias term in model
       
    perfect_fit_tol: float (DEAFAULT = 1e-5)
       Prevents overflow of precision parameters (this is smallest value RSS can have).
       ( !!! Note if using EM instead of fixed-point, try smaller values
       of perfect_fit_tol, for better estimates of variance of predictive distribution )

    alpha: float (DEFAULT = 1)
       Initial value of precision paramter for coefficients ( by default we define 
       very broad distribution )
       
    copy_X : boolean, optional (DEFAULT = True)
        If True, X will be copied, otherwise will be 
        
    verbose: bool, optional (Default = False)
       If True at each iteration progress report is printed out
    
    Attributes
    ----------
    coef_  : array, shape = (n_features)
        Coefficients of the regression model (mean of posterior distribution)
        
    intercept_: float
        Value of bias term (if fit_intercept is False, then intercept_ = 0)
        
    alpha_ : float
        Estimated precision of coefficients
       
    beta_  : float 
        Estimated precision of noise
        
    eigvals_ : array, shape = (n_features, )
        Eigenvalues of covariance matrix (from posterior distribution of weights)
        
    eigvecs_ : array, shape = (n_features, n_featues)
        Eigenvectors of covariance matrix (from posterior distribution of weights)

    '''
    
    def __init__(self,n_iter = 300, tol = 1e-3, optimizer = 'fp', fit_intercept = True,
                 normalize=True, perfect_fit_tol = 1e-6, alpha = 1, copy_X = True, verbose = False):
        super(EBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose)
        if optimizer not in ['em','fp']:
            raise ValueError('Optimizer can be either "em" of "fp" ')
        self.optimizer     =  optimizer 
        self.alpha         =  alpha 
        self.perfect_fit   =  False
        self.normalize     = True
        self.scores_       =  [np.NINF]
        self.perfect_fit_tol = perfect_fit_tol
    
    def _check_convergence(self, mu, mu_old):
        '''
        Checks convergence of algorithm using changes in mean of posterior
        distribution of weights
        '''
        return np.sum(abs(mu-mu_old)>self.tol) == 0
        
        
    def _center_data(self,X,y):
        ''' Centers data'''
        X     = as_float_array(X,self.copy_X)
        # normalisation should be done in preprocessing!
        X_std = np.ones(X.shape[1], dtype = X.dtype)
        if self.fit_intercept:
            X_mean = np.average(X, axis=0)
            X -= X_mean
            if self.normalize:
                X, X_std = f_normalize(X, axis=0, copy=False,
                                         return_norm=True)
            else:
                X_std = np.ones(X.shape[1], dtype=X.dtype)
            y_mean = np.average(y, axis=0)
            y = y - y_mean
        else:
            X_mean = np.zeros(X.shape[1],dtype = X.dtype)
            y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
        return X,y, X_mean, y_mean, X_std
            
    def fit(self, X, y):
        '''
        Fits Bayesian Linear Regression using Empirical Bayes
        
        Parameters
        ----------
        X: array-like of size [n_samples,n_features]
           Matrix of explanatory variables (should not include bias term)
       
        y: array-like of size [n_features]
           Vector of dependent variables.
           
        Returns
        -------
        object: self
          self
    
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
        n_samples, n_features = X.shape
        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
        self._x_mean_ = X_mean
        self._y_mean  = y_mean
        self._x_std   = X_std
        
        #  precision of noise & and coefficients
        alpha   =  self.alpha
        var_y  = np.var(y)
        # check that variance is non zero !!!
        if var_y == 0 :
            beta = 1e-2
        else:
            beta = 1. / np.var(y)

        # to speed all further computations save svd decomposition and reuse it later
        u,d,vt   = svd(X, full_matrices = False)
        Uy      = np.dot(u.T,y)
        dsq     = d**2
        mu      = 0
    
        for i in range(self.n_iter):
            
            # find mean for posterior of w ( for EM this is E-step)
            mu_old  =  mu
            if n_samples > n_features:
                 mu =  vt.T *  d/(dsq+alpha/beta) 
            else:
                 # clever use of SVD here , faster for large n_features
                 mu =  u * 1./(dsq + alpha/beta)
                 mu =  np.dot(X.T,mu)
            mu =  np.dot(mu,Uy)

            # precompute errors, since both methods use it in estimation
            error   = y - np.dot(X,mu)
            sqdErr  = np.sum(error**2)
            
            if sqdErr / n_samples < self.perfect_fit_tol:
                self.perfect_fit = True
                warnings.warn( ('Almost perfect fit!!! Estimated values of variance '
                                'for predictive distribution are computed using only RSS'))
                break
            
            if self.optimizer == "fp":           
                gamma      =  np.sum(beta*dsq/(beta*dsq + alpha))
                # use updated mu and gamma parameters to update alpha and beta
                # !!! made computation numerically stable for perfect fit case
                alpha      =   gamma  / (np.sum(mu**2) + np.finfo(np.float32).eps )
                beta       =  ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps )
            else:             
                # M-step, update parameters alpha and beta to maximize ML TYPE II
                eigvals    = 1. / (beta * dsq + alpha)
                alpha      = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) )
                beta       = n_samples / ( sqdErr + np.sum(dsq/eigvals) )

            # if converged or exceeded maximum number of iterations => terminate
            converged = self._check_convergence(mu_old,mu)
            if self.verbose:
                print( "Iteration {0} completed".format(i) )
                if converged is True:
                    print("Algorithm converged after {0} iterations".format(i))
            if converged or i==self.n_iter -1:
                break
        eigvals       = 1./(beta * dsq + alpha)
        self.coef_    = beta*np.dot(vt.T*d*eigvals ,Uy)
        self._set_intercept(X_mean,y_mean,X_std)
        self.beta_    = beta
        self.alpha_   = alpha
        self.eigvals_ = eigvals
        self.eigvecs_ = vt.T
        
        # set intercept_
        if self.fit_intercept:
            self.coef_ = self.coef_ / X_std
            self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
        else:
            self.intercept_ = 0.

        return self
    
    def predict(self,X, return_std=False):
        '''
        Computes predictive distribution for test set.
        Predictive distribution for each data point is one dimensional
        Gaussian and therefore is characterised by mean and variance.
        
        Parameters
        -----------
        X: {array-like, sparse} (n_samples_test, n_features)
           Test data, matrix of explanatory variables
           
        Returns
        -------
        : list of length two [y_hat, var_hat]
        
             y_hat: numpy array of size (n_samples_test,)
                    Estimated values of targets on test set (i.e. mean of predictive
                    distribution)
           
             var_hat: numpy array of size (n_samples_test,)
                    Variance of predictive distribution
        '''
        y_hat     = np.dot(X,self.coef_) + self.intercept_
        
        if return_std:
            if self.normalize:
                X   = (X - self._x_mean_) / self._x_std
            data_noise  = 1./self.beta_
            model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
            var_pred    =  data_noise + model_noise
            std_hat = np.sqrt(var_pred)
            return y_hat, std_hat
        else:
            return y_hat

Ancestors

  • BayesianLinearRegression
  • sklearn.base.RegressorMixin
  • sklearn.linear_model._base.LinearModel
  • sklearn.base.BaseEstimator

Methods

def fit(self, X, y)

Fits Bayesian Linear Regression using Empirical Bayes

Parameters

X : array-like of size [n_samples,n_features]
 

Matrix of explanatory variables (should not include bias term)

y : array-like of size [n_features]
 

Vector of dependent variables.

Returns

object : self
 

self

Expand source code
def fit(self, X, y):
    '''
    Fits Bayesian Linear Regression using Empirical Bayes
    
    Parameters
    ----------
    X: array-like of size [n_samples,n_features]
       Matrix of explanatory variables (should not include bias term)
   
    y: array-like of size [n_features]
       Vector of dependent variables.
       
    Returns
    -------
    object: self
      self

    '''
    # preprocess data
    X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
    n_samples, n_features = X.shape
    X, y, X_mean, y_mean, X_std = self._center_data(X, y)
    self._x_mean_ = X_mean
    self._y_mean  = y_mean
    self._x_std   = X_std
    
    #  precision of noise & and coefficients
    alpha   =  self.alpha
    var_y  = np.var(y)
    # check that variance is non zero !!!
    if var_y == 0 :
        beta = 1e-2
    else:
        beta = 1. / np.var(y)

    # to speed all further computations save svd decomposition and reuse it later
    u,d,vt   = svd(X, full_matrices = False)
    Uy      = np.dot(u.T,y)
    dsq     = d**2
    mu      = 0

    for i in range(self.n_iter):
        
        # find mean for posterior of w ( for EM this is E-step)
        mu_old  =  mu
        if n_samples > n_features:
             mu =  vt.T *  d/(dsq+alpha/beta) 
        else:
             # clever use of SVD here , faster for large n_features
             mu =  u * 1./(dsq + alpha/beta)
             mu =  np.dot(X.T,mu)
        mu =  np.dot(mu,Uy)

        # precompute errors, since both methods use it in estimation
        error   = y - np.dot(X,mu)
        sqdErr  = np.sum(error**2)
        
        if sqdErr / n_samples < self.perfect_fit_tol:
            self.perfect_fit = True
            warnings.warn( ('Almost perfect fit!!! Estimated values of variance '
                            'for predictive distribution are computed using only RSS'))
            break
        
        if self.optimizer == "fp":           
            gamma      =  np.sum(beta*dsq/(beta*dsq + alpha))
            # use updated mu and gamma parameters to update alpha and beta
            # !!! made computation numerically stable for perfect fit case
            alpha      =   gamma  / (np.sum(mu**2) + np.finfo(np.float32).eps )
            beta       =  ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps )
        else:             
            # M-step, update parameters alpha and beta to maximize ML TYPE II
            eigvals    = 1. / (beta * dsq + alpha)
            alpha      = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) )
            beta       = n_samples / ( sqdErr + np.sum(dsq/eigvals) )

        # if converged or exceeded maximum number of iterations => terminate
        converged = self._check_convergence(mu_old,mu)
        if self.verbose:
            print( "Iteration {0} completed".format(i) )
            if converged is True:
                print("Algorithm converged after {0} iterations".format(i))
        if converged or i==self.n_iter -1:
            break
    eigvals       = 1./(beta * dsq + alpha)
    self.coef_    = beta*np.dot(vt.T*d*eigvals ,Uy)
    self._set_intercept(X_mean,y_mean,X_std)
    self.beta_    = beta
    self.alpha_   = alpha
    self.eigvals_ = eigvals
    self.eigvecs_ = vt.T
    
    # set intercept_
    if self.fit_intercept:
        self.coef_ = self.coef_ / X_std
        self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
    else:
        self.intercept_ = 0.

    return self
def predict(self, X, return_std=False)

Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance.

Parameters

X : {array-like, sparse} (n_samples_test, n_features)
 

Test data, matrix of explanatory variables

Returns

list of length two [y_hat, var_hat]

y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution)

var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution

Expand source code
def predict(self,X, return_std=False):
    '''
    Computes predictive distribution for test set.
    Predictive distribution for each data point is one dimensional
    Gaussian and therefore is characterised by mean and variance.
    
    Parameters
    -----------
    X: {array-like, sparse} (n_samples_test, n_features)
       Test data, matrix of explanatory variables
       
    Returns
    -------
    : list of length two [y_hat, var_hat]
    
         y_hat: numpy array of size (n_samples_test,)
                Estimated values of targets on test set (i.e. mean of predictive
                distribution)
       
         var_hat: numpy array of size (n_samples_test,)
                Variance of predictive distribution
    '''
    y_hat     = np.dot(X,self.coef_) + self.intercept_
    
    if return_std:
        if self.normalize:
            X   = (X - self._x_mean_) / self._x_std
        data_noise  = 1./self.beta_
        model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
        var_pred    =  data_noise + model_noise
        std_hat = np.sqrt(var_pred)
        return y_hat, std_hat
    else:
        return y_hat

Inherited members

class VBLinearRegression (n_iter=100, tol=0.0001, fit_intercept=True, a=0.0001, b=0.0001, c=0.0001, d=0.0001, copy_X=True, verbose=False)

Implements Bayesian Linear Regression using mean-field approximation. Assumes gamma prior on precision parameters of coefficients and noise.

Parameters:

n_iter: int, optional (DEFAULT = 100) Maximum number of iterations for KL minimization

tol: float, optional (DEFAULT = 1e-3) Convergence threshold

fit_intercept: bool, optional (DEFAULT = True) If True will use bias term in model fitting

a: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of coefficients

b: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision coefficients

c: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of noise

d: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision of noise

verbose: bool, optional (Default = False) If True at each iteration progress report is printed out

Attributes

coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution)

intercept_ : float
Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_ : float
Mean of precision of coefficients

beta_ : float Mean of precision of noise

eigvals_ : array, shape = (n_features, )
Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_ : array, shape = (n_features, n_featues)
Eigenvectors of covariance matrix (from posterior distribution of weights)
Expand source code
class VBLinearRegression(BayesianLinearRegression):
    '''
    Implements Bayesian Linear Regression using mean-field approximation.
    Assumes gamma prior on precision parameters of coefficients and noise.

    Parameters:
    -----------
    n_iter: int, optional (DEFAULT = 100)
       Maximum number of iterations for KL minimization

    tol: float, optional (DEFAULT = 1e-3)
       Convergence threshold
       
    fit_intercept: bool, optional (DEFAULT = True)
       If True will use bias term in model fitting

    a: float, optional (Default = 1e-4)
       Shape parameter of Gamma prior for precision of coefficients
       
    b: float, optional (Default = 1e-4)
       Rate parameter of Gamma prior for precision coefficients
       
    c: float, optional (Default = 1e-4)
       Shape parameter of  Gamma prior for precision of noise
       
    d: float, optional (Default = 1e-4)
       Rate parameter of  Gamma prior for precision of noise
       
    verbose: bool, optional (Default = False)
       If True at each iteration progress report is printed out
       
    Attributes
    ----------
    coef_  : array, shape = (n_features)
        Coefficients of the regression model (mean of posterior distribution)
        
    intercept_: float
        Value of bias term (if fit_intercept is False, then intercept_ = 0)
        
    alpha_ : float
        Mean of precision of coefficients
       
    beta_  : float 
        Mean of precision of noise

    eigvals_ : array, shape = (n_features, )
        Eigenvalues of covariance matrix (from posterior distribution of weights)
        
    eigvecs_ : array, shape = (n_features, n_featues)
        Eigenvectors of covariance matrix (from posterior distribution of weights)

    '''
    
    def __init__(self, n_iter = 100, tol =1e-4, fit_intercept = True, 
                 a = 1e-4, b = 1e-4, c = 1e-4, d = 1e-4, copy_X = True,
                 verbose = False):
        super(VBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X,
                                                verbose)
        self.a,self.b   =  a, b
        self.c,self.d   =  c, d

        
    def fit(self,X,y):
        '''
        Fits Variational Bayesian Linear Regression Model
        
        Parameters
        ----------
        X: array-like of size [n_samples,n_features]
           Matrix of explanatory variables (should not include bias term)
       
        Y: array-like of size [n_features]
           Vector of dependent variables.
           
        Returns
        -------
        object: self
          self
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
        n_samples, n_features = X.shape
        X, y, X_mean, y_mean, X_std = self._center_data(X, y)
        self._x_mean_ = X_mean
        self._y_mean  = y_mean
        self._x_std   = X_std
        
        # SVD decomposition, done once , reused at each iteration
        u,D,vt = svd(X, full_matrices = False)
        dsq    = D**2
        UY     = np.dot(u.T,y)
        
        # some parameters of Gamma distribution have closed form solution
        a      = self.a + 0.5 * n_features
        c      = self.c + 0.5 * n_samples
        b,d    = self.b,  self.d
        
        # initial mean of posterior for coefficients
        mu     = 0
                
        for i in range(self.n_iter):
            
            # update parameters of distribution Q(weights)
            e_beta       = gamma_mean(c,d)
            e_alpha      = gamma_mean(a,b)
            mu_old       = np.copy(mu)
            mu,eigvals   = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X)
            
            # update parameters of distribution Q(precision of weights) 
            b            = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals))
            
            # update parameters of distribution Q(precision of likelihood)
            sqderr       = np.sum((y - np.dot(X,mu))**2)
            xsx          = np.sum(dsq*eigvals)
            d            = self.d + 0.5*(sqderr + xsx)
 
            # check convergence 
            converged = self._check_convergence(mu,mu_old)
            if self.verbose is True:
                print("Iteration {0} is completed".format(i))
                if converged is True:
                    print("Algorithm converged after {0} iterations".format(i))
               
            # terminate if convergence or maximum number of iterations are achieved
            if converged or i==(self.n_iter-1):
                break
            
        # save necessary parameters    
        self.beta_   = gamma_mean(c,d)
        self.alpha_  = gamma_mean(a,b)
        self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY,
                                                            dsq, u, vt, D, X)
        self._set_intercept(X_mean,y_mean,X_std)
        self.eigvecs_ = vt.T
        return self
        

    def _posterior_weights(self, e_beta, e_alpha, UY, dsq, u, vt, d, X):
        '''
        Calculates parameters of approximate posterior distribution 
        of weights
        '''
        # eigenvalues of covariance matrix
        sigma = 1./ (e_beta*dsq + e_alpha)
        
        # mean of approximate posterior distribution
        n_samples, n_features = X.shape
        if n_samples > n_features:
             mu =  vt.T *  d/(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps) 
        else:
             mu =  u * 1./(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps)
             mu =  np.dot(X.T,mu)
        mu =  np.dot(mu,UY)
        return mu,sigma
        
    def predict(self,X, return_std=False):
        '''
        Computes predictive distribution for test set.
        Predictive distribution for each data point is one dimensional
        Gaussian and therefore is characterised by mean and variance.
        
        Parameters
        -----------
        X: {array-like, sparse} (n_samples_test, n_features)
           Test data, matrix of explanatory variables
           
        Returns
        -------
        : list of length two [y_hat, var_hat]
        
             y_hat: numpy array of size (n_samples_test,)
                    Estimated values of targets on test set (i.e. mean of predictive
                    distribution)
           
             var_hat: numpy array of size (n_samples_test,)
                    Variance of predictive distribution
        '''
        x         = (X - self._x_mean_) / self._x_std
        y_hat     = np.dot(x,self.coef_) + self._y_mean
        
        if return_std:
            data_noise  = 1./self.beta_
            model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
            var_pred    =  data_noise + model_noise
            std_hat = np.sqrt(var_pred)
            return y_hat, std_hat
        else:
            return y_hat

Ancestors

  • BayesianLinearRegression
  • sklearn.base.RegressorMixin
  • sklearn.linear_model._base.LinearModel
  • sklearn.base.BaseEstimator

Methods

def fit(self, X, y)

Fits Variational Bayesian Linear Regression Model

Parameters

X : array-like of size [n_samples,n_features]
 

Matrix of explanatory variables (should not include bias term)

Y : array-like of size [n_features]
 

Vector of dependent variables.

Returns

object : self
 

self

Expand source code
def fit(self,X,y):
    '''
    Fits Variational Bayesian Linear Regression Model
    
    Parameters
    ----------
    X: array-like of size [n_samples,n_features]
       Matrix of explanatory variables (should not include bias term)
   
    Y: array-like of size [n_features]
       Vector of dependent variables.
       
    Returns
    -------
    object: self
      self
    '''
    # preprocess data
    X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
    n_samples, n_features = X.shape
    X, y, X_mean, y_mean, X_std = self._center_data(X, y)
    self._x_mean_ = X_mean
    self._y_mean  = y_mean
    self._x_std   = X_std
    
    # SVD decomposition, done once , reused at each iteration
    u,D,vt = svd(X, full_matrices = False)
    dsq    = D**2
    UY     = np.dot(u.T,y)
    
    # some parameters of Gamma distribution have closed form solution
    a      = self.a + 0.5 * n_features
    c      = self.c + 0.5 * n_samples
    b,d    = self.b,  self.d
    
    # initial mean of posterior for coefficients
    mu     = 0
            
    for i in range(self.n_iter):
        
        # update parameters of distribution Q(weights)
        e_beta       = gamma_mean(c,d)
        e_alpha      = gamma_mean(a,b)
        mu_old       = np.copy(mu)
        mu,eigvals   = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X)
        
        # update parameters of distribution Q(precision of weights) 
        b            = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals))
        
        # update parameters of distribution Q(precision of likelihood)
        sqderr       = np.sum((y - np.dot(X,mu))**2)
        xsx          = np.sum(dsq*eigvals)
        d            = self.d + 0.5*(sqderr + xsx)

        # check convergence 
        converged = self._check_convergence(mu,mu_old)
        if self.verbose is True:
            print("Iteration {0} is completed".format(i))
            if converged is True:
                print("Algorithm converged after {0} iterations".format(i))
           
        # terminate if convergence or maximum number of iterations are achieved
        if converged or i==(self.n_iter-1):
            break
        
    # save necessary parameters    
    self.beta_   = gamma_mean(c,d)
    self.alpha_  = gamma_mean(a,b)
    self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY,
                                                        dsq, u, vt, D, X)
    self._set_intercept(X_mean,y_mean,X_std)
    self.eigvecs_ = vt.T
    return self
def predict(self, X, return_std=False)

Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance.

Parameters

X : {array-like, sparse} (n_samples_test, n_features)
 

Test data, matrix of explanatory variables

Returns

list of length two [y_hat, var_hat]

y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution)

var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution

Expand source code
def predict(self,X, return_std=False):
    '''
    Computes predictive distribution for test set.
    Predictive distribution for each data point is one dimensional
    Gaussian and therefore is characterised by mean and variance.
    
    Parameters
    -----------
    X: {array-like, sparse} (n_samples_test, n_features)
       Test data, matrix of explanatory variables
       
    Returns
    -------
    : list of length two [y_hat, var_hat]
    
         y_hat: numpy array of size (n_samples_test,)
                Estimated values of targets on test set (i.e. mean of predictive
                distribution)
       
         var_hat: numpy array of size (n_samples_test,)
                Variance of predictive distribution
    '''
    x         = (X - self._x_mean_) / self._x_std
    y_hat     = np.dot(x,self.coef_) + self._y_mean
    
    if return_std:
        data_noise  = 1./self.beta_
        model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
        var_pred    =  data_noise + model_noise
        std_hat = np.sqrt(var_pred)
        return y_hat, std_hat
    else:
        return y_hat

Inherited members