import numpy as np
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iter=100, fit_intercept=True, verbose=False):
        self.learning_rate = learning_rate  # learning_rate of the algorithm
        self.num_iter = num_iter  #  number of iterations of the gradient descent
        self.fit_intercept = fit_intercept  # boolean indicating whether we`re adding base X0 feature vector or not
        self.verbose = verbose  

    def _add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))  #  creating X0 features vector(M x 1)
        return np.concatenate((intercept, X), axis=1)  # concatenating X0 features vector with our features making intercept

    def _logit(self, z):
        '''Defines our "logit" function based on which we make predictions
           parameters:
              z - product of the our features with weights
           return:
              probability of the attachment to class
        '''

        return 1/(1 + np.exp(-z))

    def _loss(self, g, y):
        '''
        Functions have parameters or weights and we want to find the best values for them.
        To start we pick random values and we need a way to measure how well the algorithm performs using those random weights.
        That measure is computed using the loss function
        '''

        return (-y * np.log(g) - (1 - y) * np.log(1 - g)).mean()

    def train(self, X, y):
        '''
        Function for training the algorithm.
            parameters:
              X - input data matrix (all our features without target variable)
              y - target variable vector (1/0)
            
            return:
              None
        '''

        if self.fit_intercept:
            X = self._add_intercept(X)  # X will get a result with "zero" feature

        self._weights = np.zeros(X.shape[1])  #  inicializing our weights vector filled with zeros
        
        for i in range(self.num_iter):  # implementing Gradient Descent algorithm
            z = np.dot(X, self._weights)  #  calculate the product of the weights and predictor matrix
            g = self._logit(z)
            gradient = np.dot(X.T, (g - y)) / y.size
            self._weights =  self._weights  - self.learning_rate * gradient
            
            if (self.verbose == True and i % 100 == 0):
                z = np.dot(X, self._weights)
                g = self._logit(z)
                print(f'loss: {self._loss(g, y)} \t')

    def predict_prob(self, X):  
        if self.fit_intercept:
            X = self._add_intercept(X)
    
        return self._logit(np.dot(X, self._weights))
    
    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer


X, y = load_breast_cancer(return_X_y=True)  # load the dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)  # split the dataset
X_train_scaled = (X_train - np.average(X_train, 0))/np.std(X_train)
X_test_scaled = (X_test - np.average(X_test, 0))/np.std(X_test)
#print(X.shape)
#print(X[:2])

# Create an instance of the LogisticRegression class

logit = LogisticRegression(learning_rate=0.01, num_iter=10000, verbose=True)  # you can play with hyperparameters to understand how learning rate works.

# Train the models
logit.train(X_train_scaled, y_train)

loss: 0.6931471805599453 	
loss: 0.3677875336279482 	
loss: 0.30813972433579384 	
loss: 0.2837383861675563 	
loss: 0.2708270038151539 	
loss: 0.2629761041263377 	
loss: 0.2577488031548123 	
loss: 0.254031390193723 	
loss: 0.2512492230791035 	
loss: 0.2490792779461498 	
loss: 0.24732795803138768 	
loss: 0.24587329028807284 	
loss: 0.24463526792421764 	
loss: 0.24355962978846127 	
loss: 0.2426085185636089 	
loss: 0.2417548662954826 	
loss: 0.24097889707612913 	
loss: 0.24026588103931001 	
loss: 0.23960465347098578 	
loss: 0.23898661568183166 	
loss: 0.23840504709488353 	
loss: 0.2378546229601606 	
loss: 0.237331070677176 	
loss: 0.23683092123562108 	
loss: 0.23635132698966876 	
loss: 0.23588992637268458 	
loss: 0.23544474227493287 	
loss: 0.23501410486043947 	
loss: 0.23459659232931937 	
loss: 0.23419098499773808 	
loss: 0.2337962293600533 	
loss: 0.23341140970389912 	
loss: 0.23303572549166965 	
loss: 0.2326684731824616 	
loss: 0.2323090315018704 	
loss: 0.23195684941047418 	
loss: 0.2316114362011382 	
loss: 0.23127235328840479 	
loss: 0.23093920735282458 	
loss: 0.2306116445781363 	
loss: 0.23028934577614332 	
loss: 0.2299720222376261 	
loss: 0.22965941218105942 	
loss: 0.22935127769675998 	
loss: 0.22904740210421234 	
loss: 0.2287475876560675 	
loss: 0.22845165353471283 	
loss: 0.22815943409713021 	
loss: 0.22787077733158254 	
loss: 0.22758554349593044 	
loss: 0.2273036039124295 	
loss: 0.22702483989794348 	
loss: 0.22674914181183572 	
loss: 0.22647640820652837 	
loss: 0.22620654506796053 	
loss: 0.22593946513503269 	
loss: 0.2256750872886707 	
loss: 0.22541333600243166 	
loss: 0.2251541408476608 	
loss: 0.2248974360471234 	
loss: 0.22464316007181284 	
loss: 0.22439125527629852 	
loss: 0.22414166756854229 	
loss: 0.22389434611060072 	
loss: 0.2236492430470486 	
loss: 0.2234063132583235 	
loss: 0.22316551413650584 	
loss: 0.22292680538132623 	
loss: 0.22269014881442936 	
loss: 0.2224555082101385 	
loss: 0.22222284914114662 	
loss: 0.2219921388377268 	
loss: 0.22176334605919729 	
loss: 0.22153644097650593 	
loss: 0.22131139506491115 	
loss: 0.22108818100583968 	
loss: 0.22086677259708953 	
loss: 0.22064714467062843 	
loss: 0.22042927301730977 	
loss: 0.22021313431789186 	
loss: 0.2199987060798053 	
loss: 0.2197859665791637 	
loss: 0.21957489480756182 	
loss: 0.21936547042324453 	
loss: 0.2191576737062704 	
loss: 0.21895148551732665 	
loss: 0.2187468872598831 	
loss: 0.2185438608454005 	
loss: 0.2183423886613355 	
loss: 0.21814245354170517 	
loss: 0.21794403873999546 	
loss: 0.21774712790421885 	
loss: 0.21755170505393942 	
loss: 0.21735775455910342 	
loss: 0.21716526112052453 	
loss: 0.21697420975188705 	
loss: 0.2167845857631422 	
loss: 0.21659637474518226 	
loss: 0.21640956255568833 	
loss: 0.21622413530605525

# Normalize output generated by sigmoid function
y_pred = [int(round(x)) for x in logit.predict_prob(X_test_scaled).flatten()]  # you can do it much sipler it`s just me
# look at the score
score = accuracy_score(y_test, y_pred)  # 0.956 -> 95.6 %.
print(score)

0.9473684210526315