In [2]:
import numpy as np
class LogisticRegression:
def __init__(self, learning_rate=0.01, num_iter=100, fit_intercept=True, verbose=False):
self.learning_rate = learning_rate # learning_rate of the algorithm
self.num_iter = num_iter # number of iterations of the gradient descent
self.fit_intercept = fit_intercept # boolean indicating whether we`re adding base X0 feature vector or not
self.verbose = verbose
def _add_intercept(self, X):
intercept = np.ones((X.shape[0], 1)) # creating X0 features vector(M x 1)
return np.concatenate((intercept, X), axis=1) # concatenating X0 features vector with our features making intercept
def _logit(self, z):
'''Defines our "logit" function based on which we make predictions
parameters:
z - product of the our features with weights
return:
probability of the attachment to class
'''
return 1/(1 + np.exp(-z))
def _loss(self, g, y):
'''
Functions have parameters or weights and we want to find the best values for them.
To start we pick random values and we need a way to measure how well the algorithm performs using those random weights.
That measure is computed using the loss function
'''
return (-y * np.log(g) - (1 - y) * np.log(1 - g)).mean()
def train(self, X, y):
'''
Function for training the algorithm.
parameters:
X - input data matrix (all our features without target variable)
y - target variable vector (1/0)
return:
None
'''
if self.fit_intercept:
X = self._add_intercept(X) # X will get a result with "zero" feature
self._weights = np.zeros(X.shape[1]) # inicializing our weights vector filled with zeros
for i in range(self.num_iter): # implementing Gradient Descent algorithm
z = np.dot(X, self._weights) # calculate the product of the weights and predictor matrix
g = self._logit(z)
gradient = np.dot(X.T, (g - y)) / y.size
self._weights = self._weights - self.learning_rate * gradient
if (self.verbose == True and i % 100 == 0):
z = np.dot(X, self._weights)
g = self._logit(z)
print(f'loss: {self._loss(g, y)} \t')
def predict_prob(self, X):
if self.fit_intercept:
X = self._add_intercept(X)
return self._logit(np.dot(X, self._weights))
def predict(self, X, threshold):
return self.predict_prob(X) >= threshold
In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True) # load the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42) # split the dataset
X_train_scaled = (X_train - np.average(X_train, 0))/np.std(X_train)
X_test_scaled = (X_test - np.average(X_test, 0))/np.std(X_test)
#print(X.shape)
#print(X[:2])
In [4]:
# Create an instance of the LogisticRegression class
logit = LogisticRegression(learning_rate=0.01, num_iter=10000, verbose=True) # you can play with hyperparameters to understand how learning rate works.
In [5]:
# Train the models
logit.train(X_train_scaled, y_train)
loss: 0.6931471805599453 loss: 0.3677875336279482 loss: 0.30813972433579384 loss: 0.2837383861675563 loss: 0.2708270038151539 loss: 0.2629761041263377 loss: 0.2577488031548123 loss: 0.254031390193723 loss: 0.2512492230791035 loss: 0.2490792779461498 loss: 0.24732795803138768 loss: 0.24587329028807284 loss: 0.24463526792421764 loss: 0.24355962978846127 loss: 0.2426085185636089 loss: 0.2417548662954826 loss: 0.24097889707612913 loss: 0.24026588103931001 loss: 0.23960465347098578 loss: 0.23898661568183166 loss: 0.23840504709488353 loss: 0.2378546229601606 loss: 0.237331070677176 loss: 0.23683092123562108 loss: 0.23635132698966876 loss: 0.23588992637268458 loss: 0.23544474227493287 loss: 0.23501410486043947 loss: 0.23459659232931937 loss: 0.23419098499773808 loss: 0.2337962293600533 loss: 0.23341140970389912 loss: 0.23303572549166965 loss: 0.2326684731824616 loss: 0.2323090315018704 loss: 0.23195684941047418 loss: 0.2316114362011382 loss: 0.23127235328840479 loss: 0.23093920735282458 loss: 0.2306116445781363 loss: 0.23028934577614332 loss: 0.2299720222376261 loss: 0.22965941218105942 loss: 0.22935127769675998 loss: 0.22904740210421234 loss: 0.2287475876560675 loss: 0.22845165353471283 loss: 0.22815943409713021 loss: 0.22787077733158254 loss: 0.22758554349593044 loss: 0.2273036039124295 loss: 0.22702483989794348 loss: 0.22674914181183572 loss: 0.22647640820652837 loss: 0.22620654506796053 loss: 0.22593946513503269 loss: 0.2256750872886707 loss: 0.22541333600243166 loss: 0.2251541408476608 loss: 0.2248974360471234 loss: 0.22464316007181284 loss: 0.22439125527629852 loss: 0.22414166756854229 loss: 0.22389434611060072 loss: 0.2236492430470486 loss: 0.2234063132583235 loss: 0.22316551413650584 loss: 0.22292680538132623 loss: 0.22269014881442936 loss: 0.2224555082101385 loss: 0.22222284914114662 loss: 0.2219921388377268 loss: 0.22176334605919729 loss: 0.22153644097650593 loss: 0.22131139506491115 loss: 0.22108818100583968 loss: 0.22086677259708953 loss: 0.22064714467062843 loss: 0.22042927301730977 loss: 0.22021313431789186 loss: 0.2199987060798053 loss: 0.2197859665791637 loss: 0.21957489480756182 loss: 0.21936547042324453 loss: 0.2191576737062704 loss: 0.21895148551732665 loss: 0.2187468872598831 loss: 0.2185438608454005 loss: 0.2183423886613355 loss: 0.21814245354170517 loss: 0.21794403873999546 loss: 0.21774712790421885 loss: 0.21755170505393942 loss: 0.21735775455910342 loss: 0.21716526112052453 loss: 0.21697420975188705 loss: 0.2167845857631422 loss: 0.21659637474518226 loss: 0.21640956255568833 loss: 0.21622413530605525
In [6]:
# Normalize output generated by sigmoid function
y_pred = [int(round(x)) for x in logit.predict_prob(X_test_scaled).flatten()] # you can do it much sipler it`s just me
# look at the score
score = accuracy_score(y_test, y_pred) # 0.956 -> 95.6 %.
print(score)
0.9473684210526315