OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

paste pdf image with correct width and height and position to svg file in python using fitz

  • Thread starter Thread starter ahmad tayyab
  • Start date Start date
A

ahmad tayyab

Guest
`i am trying to convert pdf file to svg with correct formating i am using the fitz pymupdf library the text is formatted correctly but i cant adjust the image "this is my first time working with pdf or svg"

this is the code import fitz # PyMuPDF import base64 import zipfile

Code:
def pdf_to_svg_with_text_and_images(pdf_path, output_zip_path):
    document = fitz.open(pdf_path)
    num_pages = len(document)
    with zipfile.ZipFile(output_zip_path, 'w') as zipf:
        for page_index in range(num_pages):
            page = document.load_page(page_index)
        
            svg_header = '''<?xml version="1.0" encoding="UTF-8"?>
            <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
            "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
            <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"  
            version="1.1" id="main1" width="{width}" height="{height}">
            <g>
            '''.format(width=int(page.rect.width), height=int(page.rect.height))
        
            svg_content = ""
            current_y = 0  # Starting Y position for text
            text_instances = page.get_text("dict")["blocks"]
            for instance in text_instances:
                if "lines" in instance:
                    for line in instance["lines"]:
                        for span in line["spans"]:
                            svg_content += f'<text style="fill:#000000;font-family: 
                            {span["font"]};font-size:{span["size"]}px;" fill-opacity="1" x="
                            {span["bbox"][0]}" y="{span["bbox"][3]}">{span["text"]}</text>\n'
                        current_y = max(current_y, span["bbox"][3])  
        
        
            image_list = page.get_images(full=True)

            if image_list:
                for img_index, img in enumerate(image_list):
                    xref = img[0]  
                    base_image = document.extract_image(xref)
                    image_bytes = base_image["image"]
                    img_width, img_height = 400, 400  
                    img_x, img_y = img[1], img[2]  

                
                    img_base64 = base64.b64encode(image_bytes).decode("utf-8")
                
                
                    svg_content += f'<image x="{img_x}" y="{img_y}" width="{img_width}"
                    height="
                    {img_height}" xlink:href="data:image/{base_image["ext"]};base64,
                    {img_base64}"/>\n'
        
            svg_footer = '''
            </g>
            </svg>'''

        
            if svg_content:
                svg_filename = f'page{page_index + 1}.svg'
                full_svg_content = svg_header + svg_content + svg_footer
                zipf.writestr(svg_filename, full_svg_content)
                print(f"SVG file {svg_filename} added to zip")

    document.close()


input_pdf_path = 'testing_new.pdf'
output_zip_path = 'output_svgs.zip'


pdf_to_svg_with_text_and_images(input_pdf_path, output_zip_path)`

`
<p>`i am trying to convert pdf file to svg with correct formating
i am using the <strong>fitz pymupdf</strong> library
the text is formatted correctly but i cant adjust the image
"this is my first time working with pdf or svg"</p>
<p><strong>this is the code</strong>
import fitz # PyMuPDF
import base64
import zipfile</p>
<pre><code>def pdf_to_svg_with_text_and_images(pdf_path, output_zip_path):
document = fitz.open(pdf_path)
num_pages = len(document)
with zipfile.ZipFile(output_zip_path, 'w') as zipf:
for page_index in range(num_pages):
page = document.load_page(page_index)

svg_header = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
version="1.1" id="main1" width="{width}" height="{height}">
<g>
'''.format(width=int(page.rect.width), height=int(page.rect.height))

svg_content = ""
current_y = 0 # Starting Y position for text
text_instances = page.get_text("dict")["blocks"]
for instance in text_instances:
if "lines" in instance:
for line in instance["lines"]:
for span in line["spans"]:
svg_content += f'<text style="fill:#000000;font-family:
{span["font"]};font-size:{span["size"]}px;" fill-opacity="1" x="
{span["bbox"][0]}" y="{span["bbox"][3]}">{span["text"]}</text>\n'
current_y = max(current_y, span["bbox"][3])


image_list = page.get_images(full=True)

if image_list:
for img_index, img in enumerate(image_list):
xref = img[0]
base_image = document.extract_image(xref)
image_bytes = base_image["image"]
img_width, img_height = 400, 400
img_x, img_y = img[1], img[2]


img_base64 = base64.b64encode(image_bytes).decode("utf-8")


svg_content += f'<image x="{img_x}" y="{img_y}" width="{img_width}"
height="
{img_height}" xlink:href="data:image/{base_image["ext"]};base64,
{img_base64}"/>\n'

svg_footer = '''
</g>
</svg>'''


if svg_content:
svg_filename = f'page{page_index + 1}.svg'
full_svg_content = svg_header + svg_content + svg_footer
zipf.writestr(svg_filename, full_svg_content)
print(f"SVG file {svg_filename} added to zip")

document.close()


input_pdf_path = 'testing_new.pdf'
output_zip_path = 'output_svgs.zip'


pdf_to_svg_with_text_and_images(input_pdf_path, output_zip_path)`
</code></pre>
<p>`</p>
 
Top