ANN_Project/ANN_momentum.py

from Activation import *
from Data import *

import numpy as np
import matplotlib.pyplot as plt
import time

class NN:
    def __init__(self, X, Y, input_size, output_size, epochs, learning_rate, batch_size, activation, momentum):
        self.input_size = input_size
        self.output_size = output_size
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.SIGMOID = "sigmoid"
        self.hidden_layers = [150, 60]
        self.X = X
        self.Y = Y
        self.W1, self.W2, self.W3 = None, None, None
        self.b1, self.b1, self.b3 = None, None, None
        self.a1, self.a2, self.a3 = None, None, None
        self.z1, self.z2, self.z3 = None, None, None
        self.accuracies = []
        self.costs = []
        self.time = 0
        self.initialize_weights()
        self.momentum = momentum
        self.GW1 = []
        self.GW2 = []
        self.GW3 = []
        self.GB1 = []
        self.GB2 = []
        self.GB3 = []

    def initialize_weights(self):
        self.W1 = np.random.normal(0, 1, (self.hidden_layers[0], self.input_size))
        self.W2 = np.random.normal(0, 1, (self.hidden_layers[1], self.hidden_layers[0]))
        self.W3 = np.random.normal(0, 1, (self.output_size, self.hidden_layers[1]))
        self.b1 = np.zeros((self.hidden_layers[0], 1))
        self.b2 = np.zeros((self.hidden_layers[1], 1))
        self.b3 = np.zeros((self.output_size, 1))

    def shuffle_data(self):
        s = np.random.permutation(self.X.shape[1])
        self.X = self.X.T[s].T
        self.Y = self.Y.T[s].T
    
    def forward_propagation(self, x):
        self.z1 = self.W1 @ x + self.b1
        self.a1 = map_activaion(self.SIGMOID)(self.z1)
        self.z2 = self.W2 @ self.a1 + self.b2
        self.a2 = map_activaion(self.SIGMOID)(self.z2)
        self.z3 = self.W3 @ self.a2 + self.b3
        self.a3 = map_activaion(self.activation)(self.z3)
    
    def backward_propagation(self, x, y, g_w1, g_w2, g_w3, g_b1, g_b2, g_b3):
        self.forward_propagation(x)
        g_w3 += 2*(self.a3 - y) * map_activaion_derivative(self.activation)(self.z3) @ self.a2.T
        g_b3 += 2*(self.a3 - y) * map_activaion_derivative(self.activation)(self.z3)
        da2 = self.W3.T @ (2 * (self.a3 - y) * map_activaion_derivative(self.activation)(self.z3))
        g_w2 += (da2 * map_activaion_derivative(self.SIGMOID)(self.z2)) @ self.a1.T
        g_b2 += (da2 * map_activaion_derivative(self.SIGMOID)(self.z2))
        da1 = self.W2.T @ (da2 * map_activaion_derivative(self.SIGMOID)(self.z2))
        g_w1 += (da1 * map_activaion_derivative(self.SIGMOID)(self.z1)) @ x.T
        g_b1 += (da1 * map_activaion_derivative(self.SIGMOID)(self.z1))
        
    def accuracy_cost(self):
        accuracy = 0
        cost = 0
        for i in range(self.X.shape[1]):
            self.forward_propagation(self.X[:, i].reshape(self.input_size, 1))
            if np.argmax(self.a3) == np.argmax(self.Y[:, i].reshape(self.output_size, 1)):
                accuracy += 1
            cost += np.sum(np.square(self.a3 - self.Y[:, i].reshape(self.output_size, 1)))
        return accuracy / self.X.shape[1], cost / self.X.shape[1]

    def update_weights(self, g_w1, g_w2, g_w3, g_b1, g_b2, g_b3, b_s):
        DW1 = -self.learning_rate * g_w1 / b_s + self.momentum * self.GW1[-1]
        DW2 = -self.learning_rate * g_w2 / b_s + self.momentum * self.GW2[-1]
        DW3 = -self.learning_rate * g_w3 / b_s + self.momentum * self.GW3[-1]
        DB1 = -self.learning_rate * g_b1 / b_s + self.momentum * self.GB1[-1]
        DB2 = -self.learning_rate * g_b2 / b_s + self.momentum * self.GB2[-1]
        DB3 = -self.learning_rate * g_b3 / b_s + self.momentum * self.GB3[-1]
        self.GW1.append(DW1)
        self.GW2.append(DW2)
        self.GW3.append(DW3)
        self.GB1.append(DB1)
        self.GB2.append(DB2)
        self.GB3.append(DB3)
        self.W1 += DW1
        self.W2 += DW2
        self.W3 += DW3
        self.b1 += DB1
        self.b2 += DB2
        self.b3 += DB3
    
    def train(self):
        tic = time.time()
        s = 0
        self.GW1.append(0)
        self.GW2.append(0)
        self.GW3.append(0)
        self.GB1.append(0)
        self.GB2.append(0)
        self.GB3.append(0)
        for i in range(self.epochs):
            self.shuffle_data()
            for bc in range(self.X.shape[1] // self.batch_size):
                end = min(bc * self.batch_size + self.batch_size, self.X.shape[1])
                B_X = self.X[:, bc * self.batch_size:end]
                B_Y = self.Y[:, bc * self.batch_size:end]
                g_w1, g_w2, g_w3, g_b1, g_b2, g_b3 = np.zeros(self.W1.shape), np.zeros(self.W2.shape), np.zeros(self.W3.shape), np.zeros(self.b1.shape), np.zeros(self.b2.shape), np.zeros(self.b3.shape)
                for j in range(B_X.shape[1]):
                    self.backward_propagation(B_X[:, j].reshape(self.input_size, 1), B_Y[:, j].reshape(self.output_size, 1), g_w1, g_w2, g_w3, g_b1, g_b2, g_b3)
                self.update_weights(g_w1, g_w2, g_w3, g_b1, g_b2, g_b3, B_X.shape[1])
            accuracy, cost = self.accuracy_cost()
            self.accuracies.append(accuracy * 100)
            self.costs.append(cost)
        toc = time.time()
        self.time = (toc - tic) * 1000
    
    def show_result(self):
        print('Time:', self.time, 'ms')
        print('Accuracy:', f'{"{:.3f}".format(self.accuracies[-1])}', '%')
        fig, (ax1, ax2) = plt.subplots(1, 2)
        ax1.plot(self.accuracies)
        ax1.set_title('Accuracy')
        ax2.plot(self.costs)
        ax2.set_title('Cost')
        plt.show()
    
    def first_assign_prediction(self):
        self.initialize_weights()
        return self.accuracy_cost()[0] * 100

X_train, Y_train, X_test, Y_test = preprocess()
X_t = X_train
Y_t = Y_train
ACCs = [0]*10
COSTs = [0]*10
ACC_t = 0
ACC = 0
TIME = 0
N = int(input('Enter the number of run: '))
for i in range(N):
    nn = NN(X_t, Y_t, 102, 4, 10, 1, 10, "sigmoid", 0.3)
    nn.train()
    for j in range(10):
        ACCs[j] += nn.accuracies[j]
        COSTs[j] += nn.costs[j]
    ACC += nn.accuracies[-1]
    TIME += nn.time
    nn.X = X_test
    nn.Y = Y_test
    ACC_t += nn.accuracy_cost()[0]
ACC_t /= N
ACC /= N
TIME /= N
ACCs = [x / N for x in ACCs]
COSTs = [x / N for x in COSTs]
print('Average Time:', TIME, f'ms per {N} times')
print('Average Accuracy(TRAIN):', f'{"{:.3f}".format(ACC)}', f'% per {N} times')
print('Average Accuracy(TEST):', f'{"{:.3f}".format(ACC_t * 100)}', f'% per {N} times')
ACCs2 = [0]*10
COSTs2 = [0]*10
ACC_t2 = 0
ACC2 = 0
TIME2 = 0
for i in range(N):
    nn = NN(X_t, Y_t, 102, 4, 10, 1, 10, "sigmoid", 0.6)
    nn.train()
    for j in range(10):
        ACCs2[j] += nn.accuracies[j]
        COSTs2[j] += nn.costs[j]
    ACC2 += nn.accuracies[-1]
    TIME2 += nn.time
    nn.X = X_test
    nn.Y = Y_test
    ACC_t2 += nn.accuracy_cost()[0]
ACC_t2 /= N
ACC2 /= N
TIME2 /= N
ACCs2 = [x / N for x in ACCs2]
COSTs2 = [x / N for x in COSTs2]
print('Average Time:', TIME, f'ms per {N} times')
print('Average Accuracy(TRAIN):', f'{"{:.3f}".format(ACC2)}', f'% per {N} times')
print('Average Accuracy(TEST):', f'{"{:.3f}".format(ACC_t2 * 100)}', f'% per {N} times')


fig, axs = plt.subplots(2, 2)
axs[0, 0].plot(ACCs)
axs[0, 0].set_title('Accuracy with momentum 0.3')
axs[0, 1].plot(COSTs)
axs[0, 1].set_title('Cost with momentum 0.3')
axs[1, 0].plot(ACCs2)
axs[1, 0].set_title('Accuracy with momentum 0.6')
axs[1, 1].plot(COSTs2)
axs[1, 1].set_title('Cost with momentum 0.6')
plt.show()