Keras model gives exact same prediction value for all inputs even though it has high accuracy in training

import numpy as np
import tensorflow as tf
import keras
from keras import layers
from tensorflow import data as tf_data
import matplotlib.pyplot as plt

# Define image size and batch size
image_size = (180, 180)
batch_size = 32

# Load the dataset
train_ds, val_ds = keras.utils.image_dataset_from_directory(
    "images",
    validation_split=0.2,
    subset="both",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

# Define data augmentation layers
data_augmentation_layers = [
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
]

# Data augmentation function
def data_augmentation(images):
    for layer in data_augmentation_layers:
        images = layer(images)
    return images

# Apply data augmentation to the dataset
augmented_train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x), y))

# Apply data augmentation to the training images
train_ds = train_ds.map(
    lambda img, label: (data_augmentation(img), label),
    num_parallel_calls=tf_data.AUTOTUNE,
)

# Prefetch data for faster processing
train_ds = train_ds.prefetch(tf_data.AUTOTUNE)
val_ds = val_ds.prefetch(tf_data.AUTOTUNE)

# Function to create the model with a simplified architecture
def make_simplified_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)

    # Entry block
    x = layers.Rescaling(1.0 / 255)(inputs)  # Rescale the input images to [0, 1]
    x = layers.Conv2D(128, 3, strides=2, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    # First block (simplified)
    x = layers.SeparableConv2D(256, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

    # Second block (simplified)
    x = layers.SeparableConv2D(512, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

    # Global average pooling and dropout
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Output layer (binary classification)
    outputs = layers.Dense(1, activation=tf.nn.sigmoid)(x)

    return keras.Model(inputs, outputs)

# Build the model with the simplified architecture
model = make_simplified_model(input_shape=image_size + (3,), num_classes=2)

# Visualize the model structure
keras.utils.plot_model(model, show_shapes=True)

# Number of epochs for training
epochs = 25

# Define callbacks (to save the model during training)
callbacks = [
    keras.callbacks.ModelCheckpoint("save_at_{epoch}.keras"),
]

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(3e-4),
    loss=keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=[keras.metrics.BinaryAccuracy(name="acc")],
)

# Train the model
history = model.fit(
    train_ds,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=val_ds,
)

# Plot training and validation accuracy and loss
plt.plot(history.history['acc'], label="Training Accuracy")
plt.plot(history.history['val_acc'], label="Validation Accuracy")
plt.plot(history.history['loss'], label="Training Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.legend()
plt.show()

This is how i train my model, and the next code is how i test the prediction:

import tensorflow as tf
from tensorflow import keras
import numpy as np
from PIL import Image

# Load the saved model with weights (make sure you use the correct epoch file)
model = keras.models.load_model("save_at_1.keras", compile=False)  # Adjust the filename to the correct one if needed

# Test the model with a sample image
sample_image_path = "sample_image.jpg"  # Replace with your image file path

def preprocess_image(image_path, target_size):
    # Load and preprocess the image
    img = Image.open(image_path)
    img = img.resize(target_size)  # Resize to model's expected input size
    img_array = np.array(img).astype("float32") / 255.0  # Normalize
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array

# Preprocess the sample image
input_image = preprocess_image(sample_image_path, target_size=(180, 180))

# Run prediction
prediction = model.predict_on_batch(input_image)
print("Prediction:", prediction[0][0])

# Interpret the result
if prediction[0][0] > 0.5:
    print("Prediction: Class 1 (e.g., Open Eyes)")
else:
    print("Prediction: Class 0 (e.g., Closed Eyes)")

The output prediction is always the exact same value. I tried finding solutions for hours but i am just stuck and hope someone can help me with this!!!

Tried removing some layers
Tried decreasing the batch size
Tried different images
Tried different epochs
Tried converting the model to a different datatype but same result

You need to sign in to view this answers

Related Post