Module bayesvalidrox.surrogate_models.bayes_linear
Expand source code
import numpy as np
from sklearn.base import RegressorMixin
from sklearn.linear_model._base import LinearModel
from sklearn.utils import check_X_y, check_array, as_float_array
from sklearn.utils.validation import check_is_fitted
from scipy.linalg import svd
import warnings
from sklearn.preprocessing import normalize as f_normalize
class BayesianLinearRegression(RegressorMixin,LinearModel):
'''
Superclass for Empirical Bayes and Variational Bayes implementations of
Bayesian Linear Regression Model
'''
def __init__(self, n_iter, tol, fit_intercept,copy_X, verbose):
self.n_iter = n_iter
self.fit_intercept = fit_intercept
self.copy_X = copy_X
self.verbose = verbose
self.tol = tol
def _check_convergence(self, mu, mu_old):
'''
Checks convergence of algorithm using changes in mean of posterior
distribution of weights
'''
return np.sum(abs(mu-mu_old)>self.tol) == 0
def _center_data(self,X,y):
''' Centers data'''
X = as_float_array(X,self.copy_X)
# normalisation should be done in preprocessing!
X_std = np.ones(X.shape[1], dtype = X.dtype)
if self.fit_intercept:
X_mean = np.average(X,axis = 0)
y_mean = np.average(y,axis = 0)
X -= X_mean
y = y - y_mean
else:
X_mean = np.zeros(X.shape[1],dtype = X.dtype)
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
return X,y, X_mean, y_mean, X_std
def predict_dist(self,X):
'''
Calculates mean and variance of predictive distribution for each data
point of test set.(Note predictive distribution for each data point is
Gaussian, therefore it is uniquely determined by mean and variance)
Parameters
----------
x: array-like of size (n_test_samples, n_features)
Set of features for which corresponding responses should be predicted
Returns
-------
:list of two numpy arrays [mu_pred, var_pred]
mu_pred : numpy array of size (n_test_samples,)
Mean of predictive distribution
var_pred: numpy array of size (n_test_samples,)
Variance of predictive distribution
'''
# Note check_array and check_is_fitted are done within self._decision_function(X)
mu_pred = self._decision_function(X)
data_noise = 1./self.beta_
model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
var_pred = data_noise + model_noise
return [mu_pred,var_pred]
class EBLinearRegression(BayesianLinearRegression):
'''
Bayesian Regression with type II maximum likelihood (Empirical Bayes)
Parameters:
-----------
n_iter: int, optional (DEFAULT = 300)
Maximum number of iterations
tol: float, optional (DEFAULT = 1e-3)
Threshold for convergence
optimizer: str, optional (DEFAULT = 'fp')
Method for optimization , either Expectation Maximization or
Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are
faster, but can be numerically unstable (especially in case of near perfect fit).
fit_intercept: bool, optional (DEFAULT = True)
If True includes bias term in model
perfect_fit_tol: float (DEAFAULT = 1e-5)
Prevents overflow of precision parameters (this is smallest value RSS can have).
( !!! Note if using EM instead of fixed-point, try smaller values
of perfect_fit_tol, for better estimates of variance of predictive distribution )
alpha: float (DEFAULT = 1)
Initial value of precision paramter for coefficients ( by default we define
very broad distribution )
copy_X : boolean, optional (DEFAULT = True)
If True, X will be copied, otherwise will be
verbose: bool, optional (Default = False)
If True at each iteration progress report is printed out
Attributes
----------
coef_ : array, shape = (n_features)
Coefficients of the regression model (mean of posterior distribution)
intercept_: float
Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_ : float
Estimated precision of coefficients
beta_ : float
Estimated precision of noise
eigvals_ : array, shape = (n_features, )
Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_ : array, shape = (n_features, n_featues)
Eigenvectors of covariance matrix (from posterior distribution of weights)
'''
def __init__(self,n_iter = 300, tol = 1e-3, optimizer = 'fp', fit_intercept = True,
normalize=True, perfect_fit_tol = 1e-6, alpha = 1, copy_X = True, verbose = False):
super(EBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose)
if optimizer not in ['em','fp']:
raise ValueError('Optimizer can be either "em" of "fp" ')
self.optimizer = optimizer
self.alpha = alpha
self.perfect_fit = False
self.normalize = True
self.scores_ = [np.NINF]
self.perfect_fit_tol = perfect_fit_tol
def _check_convergence(self, mu, mu_old):
'''
Checks convergence of algorithm using changes in mean of posterior
distribution of weights
'''
return np.sum(abs(mu-mu_old)>self.tol) == 0
def _center_data(self,X,y):
''' Centers data'''
X = as_float_array(X,self.copy_X)
# normalisation should be done in preprocessing!
X_std = np.ones(X.shape[1], dtype = X.dtype)
if self.fit_intercept:
X_mean = np.average(X, axis=0)
X -= X_mean
if self.normalize:
X, X_std = f_normalize(X, axis=0, copy=False,
return_norm=True)
else:
X_std = np.ones(X.shape[1], dtype=X.dtype)
y_mean = np.average(y, axis=0)
y = y - y_mean
else:
X_mean = np.zeros(X.shape[1],dtype = X.dtype)
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
return X,y, X_mean, y_mean, X_std
def fit(self, X, y):
'''
Fits Bayesian Linear Regression using Empirical Bayes
Parameters
----------
X: array-like of size [n_samples,n_features]
Matrix of explanatory variables (should not include bias term)
y: array-like of size [n_features]
Vector of dependent variables.
Returns
-------
object: self
self
'''
# preprocess data
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
n_samples, n_features = X.shape
X, y, X_mean, y_mean, X_std = self._center_data(X, y)
self._x_mean_ = X_mean
self._y_mean = y_mean
self._x_std = X_std
# precision of noise & and coefficients
alpha = self.alpha
var_y = np.var(y)
# check that variance is non zero !!!
if var_y == 0 :
beta = 1e-2
else:
beta = 1. / np.var(y)
# to speed all further computations save svd decomposition and reuse it later
u,d,vt = svd(X, full_matrices = False)
Uy = np.dot(u.T,y)
dsq = d**2
mu = 0
for i in range(self.n_iter):
# find mean for posterior of w ( for EM this is E-step)
mu_old = mu
if n_samples > n_features:
mu = vt.T * d/(dsq+alpha/beta)
else:
# clever use of SVD here , faster for large n_features
mu = u * 1./(dsq + alpha/beta)
mu = np.dot(X.T,mu)
mu = np.dot(mu,Uy)
# precompute errors, since both methods use it in estimation
error = y - np.dot(X,mu)
sqdErr = np.sum(error**2)
if sqdErr / n_samples < self.perfect_fit_tol:
self.perfect_fit = True
warnings.warn( ('Almost perfect fit!!! Estimated values of variance '
'for predictive distribution are computed using only RSS'))
break
if self.optimizer == "fp":
gamma = np.sum(beta*dsq/(beta*dsq + alpha))
# use updated mu and gamma parameters to update alpha and beta
# !!! made computation numerically stable for perfect fit case
alpha = gamma / (np.sum(mu**2) + np.finfo(np.float32).eps )
beta = ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps )
else:
# M-step, update parameters alpha and beta to maximize ML TYPE II
eigvals = 1. / (beta * dsq + alpha)
alpha = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) )
beta = n_samples / ( sqdErr + np.sum(dsq/eigvals) )
# if converged or exceeded maximum number of iterations => terminate
converged = self._check_convergence(mu_old,mu)
if self.verbose:
print( "Iteration {0} completed".format(i) )
if converged is True:
print("Algorithm converged after {0} iterations".format(i))
if converged or i==self.n_iter -1:
break
eigvals = 1./(beta * dsq + alpha)
self.coef_ = beta*np.dot(vt.T*d*eigvals ,Uy)
self._set_intercept(X_mean,y_mean,X_std)
self.beta_ = beta
self.alpha_ = alpha
self.eigvals_ = eigvals
self.eigvecs_ = vt.T
# set intercept_
if self.fit_intercept:
self.coef_ = self.coef_ / X_std
self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
else:
self.intercept_ = 0.
return self
def predict(self,X, return_std=False):
'''
Computes predictive distribution for test set.
Predictive distribution for each data point is one dimensional
Gaussian and therefore is characterised by mean and variance.
Parameters
-----------
X: {array-like, sparse} (n_samples_test, n_features)
Test data, matrix of explanatory variables
Returns
-------
: list of length two [y_hat, var_hat]
y_hat: numpy array of size (n_samples_test,)
Estimated values of targets on test set (i.e. mean of predictive
distribution)
var_hat: numpy array of size (n_samples_test,)
Variance of predictive distribution
'''
y_hat = np.dot(X,self.coef_) + self.intercept_
if return_std:
if self.normalize:
X = (X - self._x_mean_) / self._x_std
data_noise = 1./self.beta_
model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
var_pred = data_noise + model_noise
std_hat = np.sqrt(var_pred)
return y_hat, std_hat
else:
return y_hat
# ============================== VBLR =========================================
def gamma_mean(a,b):
'''
Computes mean of gamma distribution
Parameters
----------
a: float
Shape parameter of Gamma distribution
b: float
Rate parameter of Gamma distribution
Returns
-------
: float
Mean of Gamma distribution
'''
return float(a) / b
class VBLinearRegression(BayesianLinearRegression):
'''
Implements Bayesian Linear Regression using mean-field approximation.
Assumes gamma prior on precision parameters of coefficients and noise.
Parameters:
-----------
n_iter: int, optional (DEFAULT = 100)
Maximum number of iterations for KL minimization
tol: float, optional (DEFAULT = 1e-3)
Convergence threshold
fit_intercept: bool, optional (DEFAULT = True)
If True will use bias term in model fitting
a: float, optional (Default = 1e-4)
Shape parameter of Gamma prior for precision of coefficients
b: float, optional (Default = 1e-4)
Rate parameter of Gamma prior for precision coefficients
c: float, optional (Default = 1e-4)
Shape parameter of Gamma prior for precision of noise
d: float, optional (Default = 1e-4)
Rate parameter of Gamma prior for precision of noise
verbose: bool, optional (Default = False)
If True at each iteration progress report is printed out
Attributes
----------
coef_ : array, shape = (n_features)
Coefficients of the regression model (mean of posterior distribution)
intercept_: float
Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_ : float
Mean of precision of coefficients
beta_ : float
Mean of precision of noise
eigvals_ : array, shape = (n_features, )
Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_ : array, shape = (n_features, n_featues)
Eigenvectors of covariance matrix (from posterior distribution of weights)
'''
def __init__(self, n_iter = 100, tol =1e-4, fit_intercept = True,
a = 1e-4, b = 1e-4, c = 1e-4, d = 1e-4, copy_X = True,
verbose = False):
super(VBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X,
verbose)
self.a,self.b = a, b
self.c,self.d = c, d
def fit(self,X,y):
'''
Fits Variational Bayesian Linear Regression Model
Parameters
----------
X: array-like of size [n_samples,n_features]
Matrix of explanatory variables (should not include bias term)
Y: array-like of size [n_features]
Vector of dependent variables.
Returns
-------
object: self
self
'''
# preprocess data
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
n_samples, n_features = X.shape
X, y, X_mean, y_mean, X_std = self._center_data(X, y)
self._x_mean_ = X_mean
self._y_mean = y_mean
self._x_std = X_std
# SVD decomposition, done once , reused at each iteration
u,D,vt = svd(X, full_matrices = False)
dsq = D**2
UY = np.dot(u.T,y)
# some parameters of Gamma distribution have closed form solution
a = self.a + 0.5 * n_features
c = self.c + 0.5 * n_samples
b,d = self.b, self.d
# initial mean of posterior for coefficients
mu = 0
for i in range(self.n_iter):
# update parameters of distribution Q(weights)
e_beta = gamma_mean(c,d)
e_alpha = gamma_mean(a,b)
mu_old = np.copy(mu)
mu,eigvals = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X)
# update parameters of distribution Q(precision of weights)
b = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals))
# update parameters of distribution Q(precision of likelihood)
sqderr = np.sum((y - np.dot(X,mu))**2)
xsx = np.sum(dsq*eigvals)
d = self.d + 0.5*(sqderr + xsx)
# check convergence
converged = self._check_convergence(mu,mu_old)
if self.verbose is True:
print("Iteration {0} is completed".format(i))
if converged is True:
print("Algorithm converged after {0} iterations".format(i))
# terminate if convergence or maximum number of iterations are achieved
if converged or i==(self.n_iter-1):
break
# save necessary parameters
self.beta_ = gamma_mean(c,d)
self.alpha_ = gamma_mean(a,b)
self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY,
dsq, u, vt, D, X)
self._set_intercept(X_mean,y_mean,X_std)
self.eigvecs_ = vt.T
return self
def _posterior_weights(self, e_beta, e_alpha, UY, dsq, u, vt, d, X):
'''
Calculates parameters of approximate posterior distribution
of weights
'''
# eigenvalues of covariance matrix
sigma = 1./ (e_beta*dsq + e_alpha)
# mean of approximate posterior distribution
n_samples, n_features = X.shape
if n_samples > n_features:
mu = vt.T * d/(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps)
else:
mu = u * 1./(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps)
mu = np.dot(X.T,mu)
mu = np.dot(mu,UY)
return mu,sigma
def predict(self,X, return_std=False):
'''
Computes predictive distribution for test set.
Predictive distribution for each data point is one dimensional
Gaussian and therefore is characterised by mean and variance.
Parameters
-----------
X: {array-like, sparse} (n_samples_test, n_features)
Test data, matrix of explanatory variables
Returns
-------
: list of length two [y_hat, var_hat]
y_hat: numpy array of size (n_samples_test,)
Estimated values of targets on test set (i.e. mean of predictive
distribution)
var_hat: numpy array of size (n_samples_test,)
Variance of predictive distribution
'''
x = (X - self._x_mean_) / self._x_std
y_hat = np.dot(x,self.coef_) + self._y_mean
if return_std:
data_noise = 1./self.beta_
model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1)
var_pred = data_noise + model_noise
std_hat = np.sqrt(var_pred)
return y_hat, std_hat
else:
return y_hat
Functions
def gamma_mean(a, b)
-
Computes mean of gamma distribution
Parameters
a
:float
Shape parameter of Gamma distribution
b
:float
Rate parameter of Gamma distribution
Returns
- float Mean of Gamma distribution
Expand source code
def gamma_mean(a,b): ''' Computes mean of gamma distribution Parameters ---------- a: float Shape parameter of Gamma distribution b: float Rate parameter of Gamma distribution Returns ------- : float Mean of Gamma distribution ''' return float(a) / b
Classes
class BayesianLinearRegression (n_iter, tol, fit_intercept, copy_X, verbose)
-
Superclass for Empirical Bayes and Variational Bayes implementations of Bayesian Linear Regression Model
Expand source code
class BayesianLinearRegression(RegressorMixin,LinearModel): ''' Superclass for Empirical Bayes and Variational Bayes implementations of Bayesian Linear Regression Model ''' def __init__(self, n_iter, tol, fit_intercept,copy_X, verbose): self.n_iter = n_iter self.fit_intercept = fit_intercept self.copy_X = copy_X self.verbose = verbose self.tol = tol def _check_convergence(self, mu, mu_old): ''' Checks convergence of algorithm using changes in mean of posterior distribution of weights ''' return np.sum(abs(mu-mu_old)>self.tol) == 0 def _center_data(self,X,y): ''' Centers data''' X = as_float_array(X,self.copy_X) # normalisation should be done in preprocessing! X_std = np.ones(X.shape[1], dtype = X.dtype) if self.fit_intercept: X_mean = np.average(X,axis = 0) y_mean = np.average(y,axis = 0) X -= X_mean y = y - y_mean else: X_mean = np.zeros(X.shape[1],dtype = X.dtype) y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) return X,y, X_mean, y_mean, X_std def predict_dist(self,X): ''' Calculates mean and variance of predictive distribution for each data point of test set.(Note predictive distribution for each data point is Gaussian, therefore it is uniquely determined by mean and variance) Parameters ---------- x: array-like of size (n_test_samples, n_features) Set of features for which corresponding responses should be predicted Returns ------- :list of two numpy arrays [mu_pred, var_pred] mu_pred : numpy array of size (n_test_samples,) Mean of predictive distribution var_pred: numpy array of size (n_test_samples,) Variance of predictive distribution ''' # Note check_array and check_is_fitted are done within self._decision_function(X) mu_pred = self._decision_function(X) data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise return [mu_pred,var_pred]
Ancestors
- sklearn.base.RegressorMixin
- sklearn.linear_model._base.LinearModel
- sklearn.base.BaseEstimator
Subclasses
Methods
def predict_dist(self, X)
-
Calculates mean and variance of predictive distribution for each data point of test set.(Note predictive distribution for each data point is Gaussian, therefore it is uniquely determined by mean and variance)
Parameters
x
:array-like
ofsize (n_test_samples, n_features)
- Set of features for which corresponding responses should be predicted
Returns
:list of two numpy arrays [mu_pred, var_pred]
mu_pred : numpy array of size (n_test_samples,) Mean of predictive distribution var_pred: numpy array of size (n_test_samples,) Variance of predictive distribution
Expand source code
def predict_dist(self,X): ''' Calculates mean and variance of predictive distribution for each data point of test set.(Note predictive distribution for each data point is Gaussian, therefore it is uniquely determined by mean and variance) Parameters ---------- x: array-like of size (n_test_samples, n_features) Set of features for which corresponding responses should be predicted Returns ------- :list of two numpy arrays [mu_pred, var_pred] mu_pred : numpy array of size (n_test_samples,) Mean of predictive distribution var_pred: numpy array of size (n_test_samples,) Variance of predictive distribution ''' # Note check_array and check_is_fitted are done within self._decision_function(X) mu_pred = self._decision_function(X) data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise return [mu_pred,var_pred]
class EBLinearRegression (n_iter=300, tol=0.001, optimizer='fp', fit_intercept=True, normalize=True, perfect_fit_tol=1e-06, alpha=1, copy_X=True, verbose=False)
-
Bayesian Regression with type II maximum likelihood (Empirical Bayes)
Parameters:
n_iter: int, optional (DEFAULT = 300) Maximum number of iterations
tol: float, optional (DEFAULT = 1e-3) Threshold for convergence
optimizer: str, optional (DEFAULT = 'fp') Method for optimization , either Expectation Maximization or Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are faster, but can be numerically unstable (especially in case of near perfect fit).
fit_intercept: bool, optional (DEFAULT = True) If True includes bias term in model
perfect_fit_tol: float (DEAFAULT = 1e-5) Prevents overflow of precision parameters (this is smallest value RSS can have). ( !!! Note if using EM instead of fixed-point, try smaller values of perfect_fit_tol, for better estimates of variance of predictive distribution )
alpha: float (DEFAULT = 1) Initial value of precision paramter for coefficients ( by default we define very broad distribution )
copy_X : boolean, optional (DEFAULT = True) If True, X will be copied, otherwise will be
verbose: bool, optional (Default = False) If True at each iteration progress report is printed out
Attributes
coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution)
intercept_
:float
- Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_
:float
- Estimated precision of coefficients
beta_ : float Estimated precision of noise
eigvals_
:array, shape = (n_features, )
- Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_
:array, shape = (n_features, n_featues)
- Eigenvectors of covariance matrix (from posterior distribution of weights)
Expand source code
class EBLinearRegression(BayesianLinearRegression): ''' Bayesian Regression with type II maximum likelihood (Empirical Bayes) Parameters: ----------- n_iter: int, optional (DEFAULT = 300) Maximum number of iterations tol: float, optional (DEFAULT = 1e-3) Threshold for convergence optimizer: str, optional (DEFAULT = 'fp') Method for optimization , either Expectation Maximization or Fixed Point Gull-MacKay {'em','fp'}. Fixed point iterations are faster, but can be numerically unstable (especially in case of near perfect fit). fit_intercept: bool, optional (DEFAULT = True) If True includes bias term in model perfect_fit_tol: float (DEAFAULT = 1e-5) Prevents overflow of precision parameters (this is smallest value RSS can have). ( !!! Note if using EM instead of fixed-point, try smaller values of perfect_fit_tol, for better estimates of variance of predictive distribution ) alpha: float (DEFAULT = 1) Initial value of precision paramter for coefficients ( by default we define very broad distribution ) copy_X : boolean, optional (DEFAULT = True) If True, X will be copied, otherwise will be verbose: bool, optional (Default = False) If True at each iteration progress report is printed out Attributes ---------- coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution) intercept_: float Value of bias term (if fit_intercept is False, then intercept_ = 0) alpha_ : float Estimated precision of coefficients beta_ : float Estimated precision of noise eigvals_ : array, shape = (n_features, ) Eigenvalues of covariance matrix (from posterior distribution of weights) eigvecs_ : array, shape = (n_features, n_featues) Eigenvectors of covariance matrix (from posterior distribution of weights) ''' def __init__(self,n_iter = 300, tol = 1e-3, optimizer = 'fp', fit_intercept = True, normalize=True, perfect_fit_tol = 1e-6, alpha = 1, copy_X = True, verbose = False): super(EBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose) if optimizer not in ['em','fp']: raise ValueError('Optimizer can be either "em" of "fp" ') self.optimizer = optimizer self.alpha = alpha self.perfect_fit = False self.normalize = True self.scores_ = [np.NINF] self.perfect_fit_tol = perfect_fit_tol def _check_convergence(self, mu, mu_old): ''' Checks convergence of algorithm using changes in mean of posterior distribution of weights ''' return np.sum(abs(mu-mu_old)>self.tol) == 0 def _center_data(self,X,y): ''' Centers data''' X = as_float_array(X,self.copy_X) # normalisation should be done in preprocessing! X_std = np.ones(X.shape[1], dtype = X.dtype) if self.fit_intercept: X_mean = np.average(X, axis=0) X -= X_mean if self.normalize: X, X_std = f_normalize(X, axis=0, copy=False, return_norm=True) else: X_std = np.ones(X.shape[1], dtype=X.dtype) y_mean = np.average(y, axis=0) y = y - y_mean else: X_mean = np.zeros(X.shape[1],dtype = X.dtype) y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) return X,y, X_mean, y_mean, X_std def fit(self, X, y): ''' Fits Bayesian Linear Regression using Empirical Bayes Parameters ---------- X: array-like of size [n_samples,n_features] Matrix of explanatory variables (should not include bias term) y: array-like of size [n_features] Vector of dependent variables. Returns ------- object: self self ''' # preprocess data X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape X, y, X_mean, y_mean, X_std = self._center_data(X, y) self._x_mean_ = X_mean self._y_mean = y_mean self._x_std = X_std # precision of noise & and coefficients alpha = self.alpha var_y = np.var(y) # check that variance is non zero !!! if var_y == 0 : beta = 1e-2 else: beta = 1. / np.var(y) # to speed all further computations save svd decomposition and reuse it later u,d,vt = svd(X, full_matrices = False) Uy = np.dot(u.T,y) dsq = d**2 mu = 0 for i in range(self.n_iter): # find mean for posterior of w ( for EM this is E-step) mu_old = mu if n_samples > n_features: mu = vt.T * d/(dsq+alpha/beta) else: # clever use of SVD here , faster for large n_features mu = u * 1./(dsq + alpha/beta) mu = np.dot(X.T,mu) mu = np.dot(mu,Uy) # precompute errors, since both methods use it in estimation error = y - np.dot(X,mu) sqdErr = np.sum(error**2) if sqdErr / n_samples < self.perfect_fit_tol: self.perfect_fit = True warnings.warn( ('Almost perfect fit!!! Estimated values of variance ' 'for predictive distribution are computed using only RSS')) break if self.optimizer == "fp": gamma = np.sum(beta*dsq/(beta*dsq + alpha)) # use updated mu and gamma parameters to update alpha and beta # !!! made computation numerically stable for perfect fit case alpha = gamma / (np.sum(mu**2) + np.finfo(np.float32).eps ) beta = ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps ) else: # M-step, update parameters alpha and beta to maximize ML TYPE II eigvals = 1. / (beta * dsq + alpha) alpha = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) ) beta = n_samples / ( sqdErr + np.sum(dsq/eigvals) ) # if converged or exceeded maximum number of iterations => terminate converged = self._check_convergence(mu_old,mu) if self.verbose: print( "Iteration {0} completed".format(i) ) if converged is True: print("Algorithm converged after {0} iterations".format(i)) if converged or i==self.n_iter -1: break eigvals = 1./(beta * dsq + alpha) self.coef_ = beta*np.dot(vt.T*d*eigvals ,Uy) self._set_intercept(X_mean,y_mean,X_std) self.beta_ = beta self.alpha_ = alpha self.eigvals_ = eigvals self.eigvecs_ = vt.T # set intercept_ if self.fit_intercept: self.coef_ = self.coef_ / X_std self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T) else: self.intercept_ = 0. return self def predict(self,X, return_std=False): ''' Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance. Parameters ----------- X: {array-like, sparse} (n_samples_test, n_features) Test data, matrix of explanatory variables Returns ------- : list of length two [y_hat, var_hat] y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution) var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution ''' y_hat = np.dot(X,self.coef_) + self.intercept_ if return_std: if self.normalize: X = (X - self._x_mean_) / self._x_std data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise std_hat = np.sqrt(var_pred) return y_hat, std_hat else: return y_hat
Ancestors
- BayesianLinearRegression
- sklearn.base.RegressorMixin
- sklearn.linear_model._base.LinearModel
- sklearn.base.BaseEstimator
Methods
def fit(self, X, y)
-
Fits Bayesian Linear Regression using Empirical Bayes
Parameters
X
:array-like
ofsize [n_samples,n_features]
Matrix of explanatory variables (should not include bias term)
y
:array-like
ofsize [n_features]
Vector of dependent variables.
Returns
object
:self
self
Expand source code
def fit(self, X, y): ''' Fits Bayesian Linear Regression using Empirical Bayes Parameters ---------- X: array-like of size [n_samples,n_features] Matrix of explanatory variables (should not include bias term) y: array-like of size [n_features] Vector of dependent variables. Returns ------- object: self self ''' # preprocess data X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape X, y, X_mean, y_mean, X_std = self._center_data(X, y) self._x_mean_ = X_mean self._y_mean = y_mean self._x_std = X_std # precision of noise & and coefficients alpha = self.alpha var_y = np.var(y) # check that variance is non zero !!! if var_y == 0 : beta = 1e-2 else: beta = 1. / np.var(y) # to speed all further computations save svd decomposition and reuse it later u,d,vt = svd(X, full_matrices = False) Uy = np.dot(u.T,y) dsq = d**2 mu = 0 for i in range(self.n_iter): # find mean for posterior of w ( for EM this is E-step) mu_old = mu if n_samples > n_features: mu = vt.T * d/(dsq+alpha/beta) else: # clever use of SVD here , faster for large n_features mu = u * 1./(dsq + alpha/beta) mu = np.dot(X.T,mu) mu = np.dot(mu,Uy) # precompute errors, since both methods use it in estimation error = y - np.dot(X,mu) sqdErr = np.sum(error**2) if sqdErr / n_samples < self.perfect_fit_tol: self.perfect_fit = True warnings.warn( ('Almost perfect fit!!! Estimated values of variance ' 'for predictive distribution are computed using only RSS')) break if self.optimizer == "fp": gamma = np.sum(beta*dsq/(beta*dsq + alpha)) # use updated mu and gamma parameters to update alpha and beta # !!! made computation numerically stable for perfect fit case alpha = gamma / (np.sum(mu**2) + np.finfo(np.float32).eps ) beta = ( n_samples - gamma ) / (sqdErr + np.finfo(np.float32).eps ) else: # M-step, update parameters alpha and beta to maximize ML TYPE II eigvals = 1. / (beta * dsq + alpha) alpha = n_features / ( np.sum(mu**2) + np.sum(1/eigvals) ) beta = n_samples / ( sqdErr + np.sum(dsq/eigvals) ) # if converged or exceeded maximum number of iterations => terminate converged = self._check_convergence(mu_old,mu) if self.verbose: print( "Iteration {0} completed".format(i) ) if converged is True: print("Algorithm converged after {0} iterations".format(i)) if converged or i==self.n_iter -1: break eigvals = 1./(beta * dsq + alpha) self.coef_ = beta*np.dot(vt.T*d*eigvals ,Uy) self._set_intercept(X_mean,y_mean,X_std) self.beta_ = beta self.alpha_ = alpha self.eigvals_ = eigvals self.eigvecs_ = vt.T # set intercept_ if self.fit_intercept: self.coef_ = self.coef_ / X_std self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T) else: self.intercept_ = 0. return self
def predict(self, X, return_std=False)
-
Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance.
Parameters
X
:{array-like, sparse} (n_samples_test, n_features)
Test data, matrix of explanatory variables
Returns
-
list of length two [y_hat, var_hat]
y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution)
var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution
Expand source code
def predict(self,X, return_std=False): ''' Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance. Parameters ----------- X: {array-like, sparse} (n_samples_test, n_features) Test data, matrix of explanatory variables Returns ------- : list of length two [y_hat, var_hat] y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution) var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution ''' y_hat = np.dot(X,self.coef_) + self.intercept_ if return_std: if self.normalize: X = (X - self._x_mean_) / self._x_std data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise std_hat = np.sqrt(var_pred) return y_hat, std_hat else: return y_hat
Inherited members
class VBLinearRegression (n_iter=100, tol=0.0001, fit_intercept=True, a=0.0001, b=0.0001, c=0.0001, d=0.0001, copy_X=True, verbose=False)
-
Implements Bayesian Linear Regression using mean-field approximation. Assumes gamma prior on precision parameters of coefficients and noise.
Parameters:
n_iter: int, optional (DEFAULT = 100) Maximum number of iterations for KL minimization
tol: float, optional (DEFAULT = 1e-3) Convergence threshold
fit_intercept: bool, optional (DEFAULT = True) If True will use bias term in model fitting
a: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of coefficients
b: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision coefficients
c: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of noise
d: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision of noise
verbose: bool, optional (Default = False) If True at each iteration progress report is printed out
Attributes
coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution)
intercept_
:float
- Value of bias term (if fit_intercept is False, then intercept_ = 0)
alpha_
:float
- Mean of precision of coefficients
beta_ : float Mean of precision of noise
eigvals_
:array, shape = (n_features, )
- Eigenvalues of covariance matrix (from posterior distribution of weights)
eigvecs_
:array, shape = (n_features, n_featues)
- Eigenvectors of covariance matrix (from posterior distribution of weights)
Expand source code
class VBLinearRegression(BayesianLinearRegression): ''' Implements Bayesian Linear Regression using mean-field approximation. Assumes gamma prior on precision parameters of coefficients and noise. Parameters: ----------- n_iter: int, optional (DEFAULT = 100) Maximum number of iterations for KL minimization tol: float, optional (DEFAULT = 1e-3) Convergence threshold fit_intercept: bool, optional (DEFAULT = True) If True will use bias term in model fitting a: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of coefficients b: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision coefficients c: float, optional (Default = 1e-4) Shape parameter of Gamma prior for precision of noise d: float, optional (Default = 1e-4) Rate parameter of Gamma prior for precision of noise verbose: bool, optional (Default = False) If True at each iteration progress report is printed out Attributes ---------- coef_ : array, shape = (n_features) Coefficients of the regression model (mean of posterior distribution) intercept_: float Value of bias term (if fit_intercept is False, then intercept_ = 0) alpha_ : float Mean of precision of coefficients beta_ : float Mean of precision of noise eigvals_ : array, shape = (n_features, ) Eigenvalues of covariance matrix (from posterior distribution of weights) eigvecs_ : array, shape = (n_features, n_featues) Eigenvectors of covariance matrix (from posterior distribution of weights) ''' def __init__(self, n_iter = 100, tol =1e-4, fit_intercept = True, a = 1e-4, b = 1e-4, c = 1e-4, d = 1e-4, copy_X = True, verbose = False): super(VBLinearRegression,self).__init__(n_iter, tol, fit_intercept, copy_X, verbose) self.a,self.b = a, b self.c,self.d = c, d def fit(self,X,y): ''' Fits Variational Bayesian Linear Regression Model Parameters ---------- X: array-like of size [n_samples,n_features] Matrix of explanatory variables (should not include bias term) Y: array-like of size [n_features] Vector of dependent variables. Returns ------- object: self self ''' # preprocess data X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape X, y, X_mean, y_mean, X_std = self._center_data(X, y) self._x_mean_ = X_mean self._y_mean = y_mean self._x_std = X_std # SVD decomposition, done once , reused at each iteration u,D,vt = svd(X, full_matrices = False) dsq = D**2 UY = np.dot(u.T,y) # some parameters of Gamma distribution have closed form solution a = self.a + 0.5 * n_features c = self.c + 0.5 * n_samples b,d = self.b, self.d # initial mean of posterior for coefficients mu = 0 for i in range(self.n_iter): # update parameters of distribution Q(weights) e_beta = gamma_mean(c,d) e_alpha = gamma_mean(a,b) mu_old = np.copy(mu) mu,eigvals = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X) # update parameters of distribution Q(precision of weights) b = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals)) # update parameters of distribution Q(precision of likelihood) sqderr = np.sum((y - np.dot(X,mu))**2) xsx = np.sum(dsq*eigvals) d = self.d + 0.5*(sqderr + xsx) # check convergence converged = self._check_convergence(mu,mu_old) if self.verbose is True: print("Iteration {0} is completed".format(i)) if converged is True: print("Algorithm converged after {0} iterations".format(i)) # terminate if convergence or maximum number of iterations are achieved if converged or i==(self.n_iter-1): break # save necessary parameters self.beta_ = gamma_mean(c,d) self.alpha_ = gamma_mean(a,b) self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY, dsq, u, vt, D, X) self._set_intercept(X_mean,y_mean,X_std) self.eigvecs_ = vt.T return self def _posterior_weights(self, e_beta, e_alpha, UY, dsq, u, vt, d, X): ''' Calculates parameters of approximate posterior distribution of weights ''' # eigenvalues of covariance matrix sigma = 1./ (e_beta*dsq + e_alpha) # mean of approximate posterior distribution n_samples, n_features = X.shape if n_samples > n_features: mu = vt.T * d/(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps) else: mu = u * 1./(dsq + e_alpha/e_beta)# + np.finfo(np.float64).eps) mu = np.dot(X.T,mu) mu = np.dot(mu,UY) return mu,sigma def predict(self,X, return_std=False): ''' Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance. Parameters ----------- X: {array-like, sparse} (n_samples_test, n_features) Test data, matrix of explanatory variables Returns ------- : list of length two [y_hat, var_hat] y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution) var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution ''' x = (X - self._x_mean_) / self._x_std y_hat = np.dot(x,self.coef_) + self._y_mean if return_std: data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise std_hat = np.sqrt(var_pred) return y_hat, std_hat else: return y_hat
Ancestors
- BayesianLinearRegression
- sklearn.base.RegressorMixin
- sklearn.linear_model._base.LinearModel
- sklearn.base.BaseEstimator
Methods
def fit(self, X, y)
-
Fits Variational Bayesian Linear Regression Model
Parameters
X
:array-like
ofsize [n_samples,n_features]
Matrix of explanatory variables (should not include bias term)
Y
:array-like
ofsize [n_features]
Vector of dependent variables.
Returns
object
:self
self
Expand source code
def fit(self,X,y): ''' Fits Variational Bayesian Linear Regression Model Parameters ---------- X: array-like of size [n_samples,n_features] Matrix of explanatory variables (should not include bias term) Y: array-like of size [n_features] Vector of dependent variables. Returns ------- object: self self ''' # preprocess data X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape X, y, X_mean, y_mean, X_std = self._center_data(X, y) self._x_mean_ = X_mean self._y_mean = y_mean self._x_std = X_std # SVD decomposition, done once , reused at each iteration u,D,vt = svd(X, full_matrices = False) dsq = D**2 UY = np.dot(u.T,y) # some parameters of Gamma distribution have closed form solution a = self.a + 0.5 * n_features c = self.c + 0.5 * n_samples b,d = self.b, self.d # initial mean of posterior for coefficients mu = 0 for i in range(self.n_iter): # update parameters of distribution Q(weights) e_beta = gamma_mean(c,d) e_alpha = gamma_mean(a,b) mu_old = np.copy(mu) mu,eigvals = self._posterior_weights(e_beta,e_alpha,UY,dsq,u,vt,D,X) # update parameters of distribution Q(precision of weights) b = self.b + 0.5*( np.sum(mu**2) + np.sum(eigvals)) # update parameters of distribution Q(precision of likelihood) sqderr = np.sum((y - np.dot(X,mu))**2) xsx = np.sum(dsq*eigvals) d = self.d + 0.5*(sqderr + xsx) # check convergence converged = self._check_convergence(mu,mu_old) if self.verbose is True: print("Iteration {0} is completed".format(i)) if converged is True: print("Algorithm converged after {0} iterations".format(i)) # terminate if convergence or maximum number of iterations are achieved if converged or i==(self.n_iter-1): break # save necessary parameters self.beta_ = gamma_mean(c,d) self.alpha_ = gamma_mean(a,b) self.coef_, self.eigvals_ = self._posterior_weights(self.beta_, self.alpha_, UY, dsq, u, vt, D, X) self._set_intercept(X_mean,y_mean,X_std) self.eigvecs_ = vt.T return self
def predict(self, X, return_std=False)
-
Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance.
Parameters
X
:{array-like, sparse} (n_samples_test, n_features)
Test data, matrix of explanatory variables
Returns
-
list of length two [y_hat, var_hat]
y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution)
var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution
Expand source code
def predict(self,X, return_std=False): ''' Computes predictive distribution for test set. Predictive distribution for each data point is one dimensional Gaussian and therefore is characterised by mean and variance. Parameters ----------- X: {array-like, sparse} (n_samples_test, n_features) Test data, matrix of explanatory variables Returns ------- : list of length two [y_hat, var_hat] y_hat: numpy array of size (n_samples_test,) Estimated values of targets on test set (i.e. mean of predictive distribution) var_hat: numpy array of size (n_samples_test,) Variance of predictive distribution ''' x = (X - self._x_mean_) / self._x_std y_hat = np.dot(x,self.coef_) + self._y_mean if return_std: data_noise = 1./self.beta_ model_noise = np.sum(np.dot(X,self.eigvecs_)**2 * self.eigvals_,1) var_pred = data_noise + model_noise std_hat = np.sqrt(var_pred) return y_hat, std_hat else: return y_hat
Inherited members