How can i extract clean japanese text from the pdf folder in python
This is my code import os import PyPDF2 # set the directory where the PDF files are located pdf_directory = '/Users/humnerohit/Desktop/test_pdf_files' # loop through each file in the directory for filename in os.listdir(pdf_directory): if filename.endswith('.pdf'): # create a PDF file object pdf_file = open(os.path.join(pdf_directory, filename), 'rb') # create a PDF reader object pdf_reader = PyPDF2.PdfFileReader(pdf_file)