OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

Incorrect digit detection using Tesseract OCR on video frames in Python

  • Thread starter Thread starter Ernán
  • Start date Start date
E

Ernán

Guest
I'm trying to calculate the real time of video recording. I have a lot of videos, some of which were lost during transmission. All of them are in mp4 format. to get the duration, I recognize the time using pytesseract.image_to_string, but I get incorrect results.

My approach involves capturing the frames, preprocessing the images, and using Tesseract to extract the text. However, the digit recognition is often incorrect or inconsistent.

the image on which the code recognizes the numbers is incorrect

The above image gives a similar answer the time is calculated from 'data2/20240619_191208_first_frame.jpg' 19:12:66

The full code looks like this:

Code:
def digit_detect(image):
    text = pytesseract.image_to_string(image, config='--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789:')
    return text

def resize_roi(image, x1 = 131, y1 = 11, x2 = 228, y2  = 32):
    roi = image[y1:y2, x1:x2]
    return roi

def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return binary

# def extract_time_from_image(image):

#     regions = [
#         (131, 11, 142, 31, '012'),         # Tens of hours (0-2)
#         (142, 11, 155, 31, '0123456789'),  # Units of hours (0-9)
#         (163, 11, 179, 31, '012345'),      # Tens of minutes (0-5)
#         (179, 11, 193, 31, '0123456789'),  # Units of minutes (0-9)
#         (202, 11, 215, 31, '012345'),      # Tens of seconds (0-5)
#         (215, 11, 226, 31, '0123456789')   # Units of seconds (0-9)
#     ]

#     digits = []

#     for (x1, y1, x2, y2, whitelist) in regions:

#         preprocess = preprocess_image(image)
      
#         resized_roi = resize_roi(preprocess, x1, y1, x2, y2)


#         custom_config = f'--psm 6 --oem 3 -c tessedit_char_whitelist={whitelist}'
#         digit = pytesseract.image_to_string(resized_roi, config=custom_config)
      
#         digits.append(digit)

#     return digits
    

folder_path = 'data/output_rec/rkbt/1' 
load_path = "data2"

if not os.path.isdir(folder_path):
    print(f"Error1")
    exit()

video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]

for video_file in video_files:
    video_path = os.path.join(folder_path, video_file)

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error2")
        continue

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    ret, first_frame = cap.read()
    if not ret:
        print(f"Error3")
        cap.release()
        continue

    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)

    ret, last_frame = cap.read()
    if not ret:
        print(f"Error3")
        cap.release()
        continue

    cap.release()

    first_frame = resize_roi(first_frame)
    last_frame = resize_roi(last_frame)

    first_frame = preprocess_image(first_frame)
    last_frame = preprocess_image(last_frame)

    # print(extract_time_from_image(first_frame))
    # print(extract_time_from_image(last_frame))

    first_frame_path = os.path.join(load_path, f"{os.path.splitext(video_file)[0]}_first_frame.jpg")
    last_frame_path = os.path.join(load_path, f"{os.path.splitext(video_file)[0]}_last_frame.jpg")

    print(f"the time is calculated from '{first_frame_path}'" , digit_detect(first_frame))
    print(f"the time is calculated from '{last_frame_path}'" , digit_detect(last_frame))

    cv2.imwrite(first_frame_path, first_frame)
    cv2.imwrite(last_frame_path, last_frame)

    print(f"Saved images with the first and last frames for '{video_file}'")
  • I tried to recognize each digit separately, limiting the many possible options.
  • I also used various variations of image binarization and various parameters of pytesseract.image_to_string
<p>I'm trying to calculate the real time of video recording. I have a lot of videos, some of which were lost during transmission. All of them are in mp4 format. to get the duration, I recognize the time using <code>pytesseract.image_to_string</code>, but I get incorrect results.</p>
<p>My approach involves capturing the frames, preprocessing the images, and using Tesseract to extract the text. However, the digit recognition is often incorrect or inconsistent.</p>
<p><a href="https://i.sstatic.net/BHR3U0Yz.jpg" rel="nofollow noreferrer">the image on which the code recognizes the numbers is incorrect</a></p>
<p>The above image gives a similar answer
the time is calculated from <code>'data2/20240619_191208_first_frame.jpg' 19:12:66</code></p>
<p>The full code looks like this:</p>
<pre><code>def digit_detect(image):
text = pytesseract.image_to_string(image, config='--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789:')
return text

def resize_roi(image, x1 = 131, y1 = 11, x2 = 228, y2 = 32):
roi = image[y1:y2, x1:x2]
return roi

def preprocess_image(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary

# def extract_time_from_image(image):

# regions = [
# (131, 11, 142, 31, '012'), # Tens of hours (0-2)
# (142, 11, 155, 31, '0123456789'), # Units of hours (0-9)
# (163, 11, 179, 31, '012345'), # Tens of minutes (0-5)
# (179, 11, 193, 31, '0123456789'), # Units of minutes (0-9)
# (202, 11, 215, 31, '012345'), # Tens of seconds (0-5)
# (215, 11, 226, 31, '0123456789') # Units of seconds (0-9)
# ]

# digits = []

# for (x1, y1, x2, y2, whitelist) in regions:

# preprocess = preprocess_image(image)

# resized_roi = resize_roi(preprocess, x1, y1, x2, y2)


# custom_config = f'--psm 6 --oem 3 -c tessedit_char_whitelist={whitelist}'
# digit = pytesseract.image_to_string(resized_roi, config=custom_config)

# digits.append(digit)

# return digits


folder_path = 'data/output_rec/rkbt/1'
load_path = "data2"

if not os.path.isdir(folder_path):
print(f"Error1")
exit()

video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]

for video_file in video_files:
video_path = os.path.join(folder_path, video_file)

cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
print(f"Error2")
continue

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

ret, first_frame = cap.read()
if not ret:
print(f"Error3")
cap.release()
continue

cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)

ret, last_frame = cap.read()
if not ret:
print(f"Error3")
cap.release()
continue

cap.release()

first_frame = resize_roi(first_frame)
last_frame = resize_roi(last_frame)

first_frame = preprocess_image(first_frame)
last_frame = preprocess_image(last_frame)

# print(extract_time_from_image(first_frame))
# print(extract_time_from_image(last_frame))

first_frame_path = os.path.join(load_path, f"{os.path.splitext(video_file)[0]}_first_frame.jpg")
last_frame_path = os.path.join(load_path, f"{os.path.splitext(video_file)[0]}_last_frame.jpg")

print(f"the time is calculated from '{first_frame_path}'" , digit_detect(first_frame))
print(f"the time is calculated from '{last_frame_path}'" , digit_detect(last_frame))

cv2.imwrite(first_frame_path, first_frame)
cv2.imwrite(last_frame_path, last_frame)

print(f"Saved images with the first and last frames for '{video_file}'")

</code></pre>
<ul>
<li>I tried to recognize each digit separately, limiting the many possible options.</li>
<li>I also used various variations of image binarization and various parameters of <code>pytesseract.image_to_string</code></li>
</ul>
 

Latest posts

Online statistics

Members online
0
Guests online
3
Total visitors
3
Top