OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

my loss doesn't drop in my neural network implementation

  • Thread starter Thread starter Omar Tarek
  • Start date Start date
O

Omar Tarek

Guest
so i made a neural network from scratch and it seems that my loss doesn't change at all that's my train code

Code:
def backward_propagation(self, X, y, activations):
        dz = []
        m = X.shape[1]
        dW = []
        dB = []
        for i in reversed(range(1, len(self.layers))):
            if i == len(self.layers)-1:
                dz = activations[i] - y
            else:
                dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
            
            dw = np.dot(dz, activations[i-1].T) / m
            db = np.sum(dz, axis=1, keepdims=True) / m
            
            dW.append(dw)
            dB.append(db)
        
        return dW[::-1], dB[::-1]

    def update_parameters(self, dW, dB, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dW[i]
            self.biases[i] -= learning_rate * dB[i]


    def train(self, X, y, learning_rate=0.01, epochs=1000):
        m = X.shape[1]
        for epoch in range(epochs):
            total_loss = 0
            for i in range(m):
                x_sample = X[:, i:i+1]
                y_sample = y[:, i:i+1]
                
                activations = self.feed_forward(x_sample)
                dW, dB = self.backward_propagation(x_sample, y_sample, activations)
                self.update_parameters(dW, dB, learning_rate)
                
                loss = self.compute_loss(activations[-1], y_sample)
                total_loss += loss
            
            avg_loss = total_loss / m
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Average Loss: {avg_loss}')

and that's the full class code

Code:
class NeuralNetwork:
    def __init__(self, *, input_size):
        self.input_size = input_size
        self.layers = [input_size]
        self.weights = []
        self.biases = []
        self.activations = []
        
    def add_layer(self, layer_size, activation='relu'):
        self.layers.append(layer_size)
        self.activations.append(activation)
        
    def initialize_weights(self):
        self.weights = []
        self.biases = []
        for i in range(1, len(self.layers)):
            in_dim = self.layers[i-1]
            out_dim = self.layers[i]
            stddev = np.sqrt(2 / (in_dim + out_dim))
            
            weight_matrix = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, in_dim))
            bias_vector = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, 1))
            
            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)
    
    def activate(self, Z, activation):
        if activation == 'relu':
            return np.maximum(0, Z)
        elif activation == 'tanh':
            return np.tanh(Z)
        elif activation == 'softmax':
            exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
            return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
        elif activation == 'linear':
            return Z
        elif activation == 'sigmoid':
            return 1 / (1 + np.exp(-Z))
        elif activation == 'binary':
            return (Z > 0.5).astype(int)  # Binary activation for output layer
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
    
    
    def activation_derivative(self, A, activation):
        if activation == 'relu':
            return (A > 0).astype(float)
        elif activation == 'tanh':
            return 1 - np.power(A, 2)
        elif activation == 'sigmoid':
            return A * (1 - A)
        elif activation == 'linear':
            return np.ones_like(A)
        elif activation == 'softmax':
            return A * (1 - A)
        elif activation == 'binary':
            return 1
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
        
        
    def feed_forward(self, X):
        A = X
        activations = [A]
        for weights, bias, activation in zip(self.weights, self.biases, self.activations):
            Z = np.dot(weights, A) + bias
            A = self.activate(Z, activation)
            activations.append(A)
        return activations
    
    
    def backward_propagation(self, X, y, activations):
        dz = []
        m = X.shape[1]
        dW = []
        dB = []
        for i in reversed(range(1, len(self.layers))):
            if i == len(self.layers)-1:
                dz = activations[i] - y
            else:
                dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
            
            dw = np.dot(dz, activations[i-1].T) / m
            db = np.sum(dz, axis=1, keepdims=True) / m
            
            dW.append(dw)
            dB.append(db)
        
        return dW[::-1], dB[::-1]  # Reverse the lists to match weights/biases order

    def update_parameters(self, dW, dB, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dW[i]
            self.biases[i] -= learning_rate * dB[i]


    def train(self, X, y, learning_rate=0.01, epochs=1000):
        m = X.shape[1]
        for epoch in range(epochs):
            total_loss = 0
            for i in range(m):
                x_sample = X[:, i:i+1]
                y_sample = y[:, i:i+1]
                
                activations = self.feed_forward(x_sample)
                dW, dB = self.backward_propagation(x_sample, y_sample, activations)
                self.update_parameters(dW, dB, learning_rate)
                
                loss = self.compute_loss(activations[-1], y_sample)
                total_loss += loss
            
            avg_loss = total_loss / m
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Average Loss: {avg_loss}')
    
    def compute_loss(self, A, y):
        m = y.shape[1]
        loss = -np.sum(y * np.log(A + 1e-8) + (1 - y) * np.log(1 - A + 1e-8)) / m
        return loss

i tried changing my intialization function and used xavier implementation and changing the learning rate and still no change in the loss

Code:
Epoch 0, Average Loss: 0.8672735163691898
Epoch 100, Average Loss: 0.6935956011113185
Epoch 200, Average Loss: 0.690694091666978
Epoch 300, Average Loss: 0.6922357305611471
Epoch 400, Average Loss: 0.6918833076884003
Epoch 500, Average Loss: 0.6909379643394351
Epoch 600, Average Loss: 0.6902891583150265
Epoch 700, Average Loss: 0.6875228090388348
Epoch 800, Average Loss: 0.6879678899764555
Epoch 900, Average Loss: 0.6670931736764081

These are my average losses
<p>so i made a neural network from scratch and it seems that my loss doesn't change at all that's my train code</p>
<pre><code>def backward_propagation(self, X, y, activations):
dz = []
m = X.shape[1]
dW = []
dB = []
for i in reversed(range(1, len(self.layers))):
if i == len(self.layers)-1:
dz = activations - y
else:
dz = np.dot(self.weights.T, dz) * self.activation_derivative(activations, self.activations)

dw = np.dot(dz, activations[i-1].T) / m
db = np.sum(dz, axis=1, keepdims=True) / m

dW.append(dw)
dB.append(db)

return dW[::-1], dB[::-1]

def update_parameters(self, dW, dB, learning_rate):
for i in range(len(self.weights)):
self.weights -= learning_rate * dW
self.biases -= learning_rate * dB


def train(self, X, y, learning_rate=0.01, epochs=1000):
m = X.shape[1]
for epoch in range(epochs):
total_loss = 0
for i in range(m):
x_sample = X[:, i:i+1]
y_sample = y[:, i:i+1]

activations = self.feed_forward(x_sample)
dW, dB = self.backward_propagation(x_sample, y_sample, activations)
self.update_parameters(dW, dB, learning_rate)

loss = self.compute_loss(activations[-1], y_sample)
total_loss += loss

avg_loss = total_loss / m
if epoch % 100 == 0:
print(f'Epoch {epoch}, Average Loss: {avg_loss}')
</code></pre>
<p>and that's the full class code</p>
<pre><code>
class NeuralNetwork:
def __init__(self, *, input_size):
self.input_size = input_size
self.layers = [input_size]
self.weights = []
self.biases = []
self.activations = []

def add_layer(self, layer_size, activation='relu'):
self.layers.append(layer_size)
self.activations.append(activation)

def initialize_weights(self):
self.weights = []
self.biases = []
for i in range(1, len(self.layers)):
in_dim = self.layers[i-1]
out_dim = self.layers
stddev = np.sqrt(2 / (in_dim + out_dim))

weight_matrix = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, in_dim))
bias_vector = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, 1))

self.weights.append(weight_matrix)
self.biases.append(bias_vector)

def activate(self, Z, activation):
if activation == 'relu':
return np.maximum(0, Z)
elif activation == 'tanh':
return np.tanh(Z)
elif activation == 'softmax':
exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
elif activation == 'linear':
return Z
elif activation == 'sigmoid':
return 1 / (1 + np.exp(-Z))
elif activation == 'binary':
return (Z > 0.5).astype(int) # Binary activation for output layer
else:
raise ValueError(f"Unsupported activation function: {activation}")


def activation_derivative(self, A, activation):
if activation == 'relu':
return (A > 0).astype(float)
elif activation == 'tanh':
return 1 - np.power(A, 2)
elif activation == 'sigmoid':
return A * (1 - A)
elif activation == 'linear':
return np.ones_like(A)
elif activation == 'softmax':
return A * (1 - A)
elif activation == 'binary':
return 1
else:
raise ValueError(f"Unsupported activation function: {activation}")


def feed_forward(self, X):
A = X
activations = [A]
for weights, bias, activation in zip(self.weights, self.biases, self.activations):
Z = np.dot(weights, A) + bias
A = self.activate(Z, activation)
activations.append(A)
return activations


def backward_propagation(self, X, y, activations):
dz = []
m = X.shape[1]
dW = []
dB = []
for i in reversed(range(1, len(self.layers))):
if i == len(self.layers)-1:
dz = activations - y
else:
dz = np.dot(self.weights.T, dz) * self.activation_derivative(activations, self.activations)

dw = np.dot(dz, activations[i-1].T) / m
db = np.sum(dz, axis=1, keepdims=True) / m

dW.append(dw)
dB.append(db)

return dW[::-1], dB[::-1] # Reverse the lists to match weights/biases order

def update_parameters(self, dW, dB, learning_rate):
for i in range(len(self.weights)):
self.weights -= learning_rate * dW
self.biases -= learning_rate * dB


def train(self, X, y, learning_rate=0.01, epochs=1000):
m = X.shape[1]
for epoch in range(epochs):
total_loss = 0
for i in range(m):
x_sample = X[:, i:i+1]
y_sample = y[:, i:i+1]

activations = self.feed_forward(x_sample)
dW, dB = self.backward_propagation(x_sample, y_sample, activations)
self.update_parameters(dW, dB, learning_rate)

loss = self.compute_loss(activations[-1], y_sample)
total_loss += loss

avg_loss = total_loss / m
if epoch % 100 == 0:
print(f'Epoch {epoch}, Average Loss: {avg_loss}')

def compute_loss(self, A, y):
m = y.shape[1]
loss = -np.sum(y * np.log(A + 1e-8) + (1 - y) * np.log(1 - A + 1e-8)) / m
return loss


</code></pre>
<p>i tried changing my intialization function and used xavier implementation and changing the learning rate and still no change in the loss</p>
<pre><code>Epoch 0, Average Loss: 0.8672735163691898
Epoch 100, Average Loss: 0.6935956011113185
Epoch 200, Average Loss: 0.690694091666978
Epoch 300, Average Loss: 0.6922357305611471
Epoch 400, Average Loss: 0.6918833076884003
Epoch 500, Average Loss: 0.6909379643394351
Epoch 600, Average Loss: 0.6902891583150265
Epoch 700, Average Loss: 0.6875228090388348
Epoch 800, Average Loss: 0.6879678899764555
Epoch 900, Average Loss: 0.6670931736764081
</code></pre>
<p>These are my average losses</p>
 

Latest posts

G
Replies
0
Views
1
Gamal Othman
G
Top