OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

Python Interpreter State Error During Tensorflow Model Training with Early Stopping

  • Thread starter Thread starter Hasan Cetin
  • Start date Start date
H

Hasan Cetin

Guest
I'm training a multi-class U-Net model for medical image segmentation using TensorFlow and Keras, and I encounter a specific error when the training process stops via early stopping. The error does not appear if I manually stop the training before the early stopping condition is met. Here's the error message:

"W tensorflow/core/kernels/data/generator_dataset_op.cc:108] Error occurred when finalizing GeneratorDataset iterator: FAILED_PRECONDITION: Python interpreter state is not initialized. The process may be terminated. [[{{node PyFunc}}]]"

However this error only occurs if the training stops with early stopping, if I decrease the epoch number and make the train stop before early stopping occurs I don't get that error.

Here is my code:

Code:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Dropout, UpSampling2D, concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical


os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# define the image size and batch size
img_size = (256, 256)
batch_size = 16
num_layers = 3
org_dir = r"somepathway"
mask_dir = r"somepathway"

seed = 3
epochs = 50
model_name = "test182v2"
num_classes = 5


def preprocess_masks(mask):
    conditions = [mask == 40, mask == 80, mask == 120, mask == 160]
    choices = [1, 2, 3, 4]
    # Apply np.select to set desired values based on conditions, and default to 0
    mask = np.select(conditions, choices, default=0)
    return to_categorical(mask, num_classes=num_classes)


# Modified generator to handle both images and masks
def combine_generator(img_gen, mask_gen):
    while True:
        try:
            img = img_gen.next()
            mask = mask_gen.next()
            yield (img, preprocess_masks(mask))
        except Exception as e:
            print(f"Error in generator: {e}")
            break

# Data generator setup
data_gen_args = dict(rescale=1./255,
                     validation_split=0.2)

image_datagen = ImageDataGenerator(rotation_range=20,
                                   width_shift_range=0.2,
                                   **data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# Create generators for images and masks
image_generator = image_datagen.flow_from_directory(
    org_dir, class_mode=None, color_mode='rgb',
    target_size=img_size, batch_size=batch_size, subset='training', seed=3)

mask_generator = mask_datagen.flow_from_directory(
    mask_dir, class_mode=None, color_mode='grayscale',
    target_size=img_size, batch_size=batch_size, subset='training', seed=3)

image_generator_val = image_datagen.flow_from_directory(
    org_dir, class_mode=None, color_mode='rgb',
    target_size=img_size, batch_size=batch_size, subset='validation', seed=3)

mask_generator_val = mask_datagen.flow_from_directory(
    mask_dir, class_mode=None, color_mode='grayscale',
    target_size=img_size, batch_size=batch_size, subset='validation', seed=3)

train_generator = combine_generator(image_generator, mask_generator)
validation_generator = combine_generator(image_generator_val, mask_generator_val)


def unet(input_size=(img_size[0], img_size[1], 3), num_classes=num_classes):
    inputs = keras.Input(input_size)

    # Encoder
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    # Defining the second level of the U-Net model
    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    # Defining the third level of the U-Net model
    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    # Defining the fourth level of the U-Net model
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    # Defining the fifth level of the U-Net model
    conv5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(conv5)
    drop5 = Dropout(0.5)(conv5)

    # Defining the sixth level of the U-Net model
    up6 = Conv2D(512, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(drop5))
    merge6 = concatenate([drop4, up6], axis=3)
    conv6 = Conv2D(512, (3, 3), activation='relu', padding='same')(merge6)
    conv6 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv6)

    # Defining the seventh level of the U-Net model
    up7 = Conv2D(256, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv6))
    merge7 = concatenate([conv3, up7], axis=3)
    conv7 = Conv2D(256, (3, 3), activation='relu', padding='same')(merge7)
    conv7 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv7)

    # Defining the eighth level of the
    up8 = Conv2D(128, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv7))
    merge8 = concatenate([conv2, up8], axis=3)
    conv8 = Conv2D(128, (3, 3), activation='relu', padding='same')(merge8)
    conv8 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv8)

    # Defining the ninth level of the U-Net model
    up9 = Conv2D(64, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv8))
    merge9 = concatenate([conv1, up9], axis=3)
    conv9 = Conv2D(64, (3, 3), activation='relu', padding='same')(merge9)
    conv9 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv9)

    # Defining the output layer of the U-Net model
    outputs = Conv2D(num_classes, (1, 1), activation='softmax')(conv9)

    # Defining the U-Net model
    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer=keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.AUC()])
    model.summary()
    return model


checkpoint = ModelCheckpoint(filepath=f'{model_name}_weights.h5',
                             monitor='val_accuracy',
                             save_best_only=True,
                             save_weights_only=True,
                             verbose=1)

early_stopping = EarlyStopping(monitor='val_loss',
                               patience=2,
                               verbose=1,
                               restore_best_weights=True,
                               )

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=1,
                              verbose=1)

# define the model
model = unet(input_size=(img_size[0], img_size[1], 3), num_classes=num_classes)

# define the callbacks for real-time model performance tracking and evaluation
callbacks = [
    keras.callbacks.ModelCheckpoint(f'{model_name}.h5', save_best_only=True),
    keras.callbacks.TensorBoard(log_dir='./logs'),
    checkpoint, early_stopping, reduce_lr
]

# train the model
history = model.fit(train_generator, batch_size=batch_size, epochs=epochs, verbose=1,
                    callbacks=callbacks, validation_data=validation_generator,
                    steps_per_epoch=(len(os.listdir(r"somepathway"))*0.8)//batch_size,
                    validation_steps=(len(os.listdir(r"somepathway"))*0.2)//batch_size)

print("Done")

I suspect the issue might be related to how TensorFlow handles threads or processes, especially when triggered by the early stopping callback. Does anyone know why this error occurs and how to resolve it to ensure smooth early stopping without crashing?

Thank you!

I have tried to modify the combinegenerator function and played with the EarlyStopping arguments but keep getting the same error.
<p>I'm training a multi-class U-Net model for medical image segmentation using TensorFlow and Keras, and I encounter a specific error when the training process stops via early stopping. The error does not appear if I manually stop the training before the early stopping condition is met. Here's the error message:</p>
<p><code>"W tensorflow/core/kernels/data/generator_dataset_op.cc:108] Error occurred when finalizing GeneratorDataset iterator: FAILED_PRECONDITION: Python interpreter state is not initialized. The process may be terminated. [[{{node PyFunc}}]]"</code></p>
<p>However this error only occurs if the training stops with early stopping, if I decrease the epoch number and make the train stop before early stopping occurs I don't get that error.</p>
<p>Here is my code:</p>
<pre><code>import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Dropout, UpSampling2D, concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical


os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# define the image size and batch size
img_size = (256, 256)
batch_size = 16
num_layers = 3
org_dir = r"somepathway"
mask_dir = r"somepathway"

seed = 3
epochs = 50
model_name = "test182v2"
num_classes = 5


def preprocess_masks(mask):
conditions = [mask == 40, mask == 80, mask == 120, mask == 160]
choices = [1, 2, 3, 4]
# Apply np.select to set desired values based on conditions, and default to 0
mask = np.select(conditions, choices, default=0)
return to_categorical(mask, num_classes=num_classes)


# Modified generator to handle both images and masks
def combine_generator(img_gen, mask_gen):
while True:
try:
img = img_gen.next()
mask = mask_gen.next()
yield (img, preprocess_masks(mask))
except Exception as e:
print(f"Error in generator: {e}")
break

# Data generator setup
data_gen_args = dict(rescale=1./255,
validation_split=0.2)

image_datagen = ImageDataGenerator(rotation_range=20,
width_shift_range=0.2,
**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# Create generators for images and masks
image_generator = image_datagen.flow_from_directory(
org_dir, class_mode=None, color_mode='rgb',
target_size=img_size, batch_size=batch_size, subset='training', seed=3)

mask_generator = mask_datagen.flow_from_directory(
mask_dir, class_mode=None, color_mode='grayscale',
target_size=img_size, batch_size=batch_size, subset='training', seed=3)

image_generator_val = image_datagen.flow_from_directory(
org_dir, class_mode=None, color_mode='rgb',
target_size=img_size, batch_size=batch_size, subset='validation', seed=3)

mask_generator_val = mask_datagen.flow_from_directory(
mask_dir, class_mode=None, color_mode='grayscale',
target_size=img_size, batch_size=batch_size, subset='validation', seed=3)

train_generator = combine_generator(image_generator, mask_generator)
validation_generator = combine_generator(image_generator_val, mask_generator_val)


def unet(input_size=(img_size[0], img_size[1], 3), num_classes=num_classes):
inputs = keras.Input(input_size)

# Encoder
conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

# Defining the second level of the U-Net model
conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

# Defining the third level of the U-Net model
conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

# Defining the fourth level of the U-Net model
conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

# Defining the fifth level of the U-Net model
conv5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(conv5)
drop5 = Dropout(0.5)(conv5)

# Defining the sixth level of the U-Net model
up6 = Conv2D(512, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(drop5))
merge6 = concatenate([drop4, up6], axis=3)
conv6 = Conv2D(512, (3, 3), activation='relu', padding='same')(merge6)
conv6 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv6)

# Defining the seventh level of the U-Net model
up7 = Conv2D(256, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv6))
merge7 = concatenate([conv3, up7], axis=3)
conv7 = Conv2D(256, (3, 3), activation='relu', padding='same')(merge7)
conv7 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv7)

# Defining the eighth level of the
up8 = Conv2D(128, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv7))
merge8 = concatenate([conv2, up8], axis=3)
conv8 = Conv2D(128, (3, 3), activation='relu', padding='same')(merge8)
conv8 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv8)

# Defining the ninth level of the U-Net model
up9 = Conv2D(64, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv8))
merge9 = concatenate([conv1, up9], axis=3)
conv9 = Conv2D(64, (3, 3), activation='relu', padding='same')(merge9)
conv9 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv9)

# Defining the output layer of the U-Net model
outputs = Conv2D(num_classes, (1, 1), activation='softmax')(conv9)

# Defining the U-Net model
model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer=keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy',
metrics=['accuracy', tf.keras.metrics.AUC()])
model.summary()
return model


checkpoint = ModelCheckpoint(filepath=f'{model_name}_weights.h5',
monitor='val_accuracy',
save_best_only=True,
save_weights_only=True,
verbose=1)

early_stopping = EarlyStopping(monitor='val_loss',
patience=2,
verbose=1,
restore_best_weights=True,
)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
factor=0.1,
patience=1,
verbose=1)

# define the model
model = unet(input_size=(img_size[0], img_size[1], 3), num_classes=num_classes)

# define the callbacks for real-time model performance tracking and evaluation
callbacks = [
keras.callbacks.ModelCheckpoint(f'{model_name}.h5', save_best_only=True),
keras.callbacks.TensorBoard(log_dir='./logs'),
checkpoint, early_stopping, reduce_lr
]

# train the model
history = model.fit(train_generator, batch_size=batch_size, epochs=epochs, verbose=1,
callbacks=callbacks, validation_data=validation_generator,
steps_per_epoch=(len(os.listdir(r"somepathway"))*0.8)//batch_size,
validation_steps=(len(os.listdir(r"somepathway"))*0.2)//batch_size)

print("Done")
</code></pre>
<p>I suspect the issue might be related to how TensorFlow handles threads or processes, especially when triggered by the early stopping callback. Does anyone know why this error occurs and how to resolve it to ensure smooth early stopping without crashing?</p>
<p>Thank you!</p>
<p>I have tried to modify the combinegenerator function and played with the EarlyStopping arguments but keep getting the same error.</p>
 

Latest posts

I
Replies
0
Views
1
impact christian
I
Top