In [2]:
import numpy as np
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iter=100, fit_intercept=True, verbose=False):
        self.learning_rate = learning_rate  # learning_rate of the algorithm
        self.num_iter = num_iter  #  number of iterations of the gradient descent
        self.fit_intercept = fit_intercept  # boolean indicating whether we`re adding base X0 feature vector or not
        self.verbose = verbose  
    def _add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))  #  creating X0 features vector(M x 1)
        return np.concatenate((intercept, X), axis=1)  # concatenating X0 features vector with our features making intercept
    def _logit(self, z):
        '''Defines our "logit" function based on which we make predictions
           parameters:
              z - product of the our features with weights
           return:
              probability of the attachment to class
        '''
        return 1/(1 + np.exp(-z))
    def _loss(self, g, y):
        '''
        Functions have parameters or weights and we want to find the best values for them.
        To start we pick random values and we need a way to measure how well the algorithm performs using those random weights.
        That measure is computed using the loss function
        '''
        return (-y * np.log(g) - (1 - y) * np.log(1 - g)).mean()
    def train(self, X, y):
        '''
        Function for training the algorithm.
            parameters:
              X - input data matrix (all our features without target variable)
              y - target variable vector (1/0)
            
            return:
              None
        '''
        if self.fit_intercept:
            X = self._add_intercept(X)  # X will get a result with "zero" feature
        self._weights = np.zeros(X.shape[1])  #  inicializing our weights vector filled with zeros
        
        for i in range(self.num_iter):  # implementing Gradient Descent algorithm
            z = np.dot(X, self._weights)  #  calculate the product of the weights and predictor matrix
            g = self._logit(z)
            gradient = np.dot(X.T, (g - y)) / y.size
            self._weights =  self._weights  - self.learning_rate * gradient
            
            if (self.verbose == True and i % 100 == 0):
                z = np.dot(X, self._weights)
                g = self._logit(z)
                print(f'loss: {self._loss(g, y)} \t')
    def predict_prob(self, X):  
        if self.fit_intercept:
            X = self._add_intercept(X)
    
        return self._logit(np.dot(X, self._weights))
    
    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold
In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)  # load the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)  # split the dataset
X_train_scaled = (X_train - np.average(X_train, 0))/np.std(X_train)
X_test_scaled = (X_test - np.average(X_test, 0))/np.std(X_test)
#print(X.shape)
#print(X[:2])
In [4]:
# Create an instance of the LogisticRegression class
logit = LogisticRegression(learning_rate=0.01, num_iter=10000, verbose=True)  # you can play with hyperparameters to understand how learning rate works.
In [5]:
# Train the models
logit.train(X_train_scaled, y_train)
loss: 0.6931471805599453 loss: 0.3677875336279482 loss: 0.30813972433579384 loss: 0.2837383861675563 loss: 0.2708270038151539 loss: 0.2629761041263377 loss: 0.2577488031548123 loss: 0.254031390193723 loss: 0.2512492230791035 loss: 0.2490792779461498 loss: 0.24732795803138768 loss: 0.24587329028807284 loss: 0.24463526792421764 loss: 0.24355962978846127 loss: 0.2426085185636089 loss: 0.2417548662954826 loss: 0.24097889707612913 loss: 0.24026588103931001 loss: 0.23960465347098578 loss: 0.23898661568183166 loss: 0.23840504709488353 loss: 0.2378546229601606 loss: 0.237331070677176 loss: 0.23683092123562108 loss: 0.23635132698966876 loss: 0.23588992637268458 loss: 0.23544474227493287 loss: 0.23501410486043947 loss: 0.23459659232931937 loss: 0.23419098499773808 loss: 0.2337962293600533 loss: 0.23341140970389912 loss: 0.23303572549166965 loss: 0.2326684731824616 loss: 0.2323090315018704 loss: 0.23195684941047418 loss: 0.2316114362011382 loss: 0.23127235328840479 loss: 0.23093920735282458 loss: 0.2306116445781363 loss: 0.23028934577614332 loss: 0.2299720222376261 loss: 0.22965941218105942 loss: 0.22935127769675998 loss: 0.22904740210421234 loss: 0.2287475876560675 loss: 0.22845165353471283 loss: 0.22815943409713021 loss: 0.22787077733158254 loss: 0.22758554349593044 loss: 0.2273036039124295 loss: 0.22702483989794348 loss: 0.22674914181183572 loss: 0.22647640820652837 loss: 0.22620654506796053 loss: 0.22593946513503269 loss: 0.2256750872886707 loss: 0.22541333600243166 loss: 0.2251541408476608 loss: 0.2248974360471234 loss: 0.22464316007181284 loss: 0.22439125527629852 loss: 0.22414166756854229 loss: 0.22389434611060072 loss: 0.2236492430470486 loss: 0.2234063132583235 loss: 0.22316551413650584 loss: 0.22292680538132623 loss: 0.22269014881442936 loss: 0.2224555082101385 loss: 0.22222284914114662 loss: 0.2219921388377268 loss: 0.22176334605919729 loss: 0.22153644097650593 loss: 0.22131139506491115 loss: 0.22108818100583968 loss: 0.22086677259708953 loss: 0.22064714467062843 loss: 0.22042927301730977 loss: 0.22021313431789186 loss: 0.2199987060798053 loss: 0.2197859665791637 loss: 0.21957489480756182 loss: 0.21936547042324453 loss: 0.2191576737062704 loss: 0.21895148551732665 loss: 0.2187468872598831 loss: 0.2185438608454005 loss: 0.2183423886613355 loss: 0.21814245354170517 loss: 0.21794403873999546 loss: 0.21774712790421885 loss: 0.21755170505393942 loss: 0.21735775455910342 loss: 0.21716526112052453 loss: 0.21697420975188705 loss: 0.2167845857631422 loss: 0.21659637474518226 loss: 0.21640956255568833 loss: 0.21622413530605525
In [6]:
# Normalize output generated by sigmoid function
y_pred = [int(round(x)) for x in logit.predict_prob(X_test_scaled).flatten()]  # you can do it much sipler it`s just me
# look at the score
score = accuracy_score(y_test, y_pred)  # 0.956 -> 95.6 %.
print(score)
0.9473684210526315