main
1import os
2import sys
3
4from pdf2image import convert_from_path
5
6
7# Converts each page of a PDF to a PNG image.
8
9
10def convert(pdf_path, output_dir, max_dim=1000):
11 images = convert_from_path(pdf_path, dpi=200)
12
13 for i, image in enumerate(images):
14 # Scale image if needed to keep width/height under `max_dim`
15 width, height = image.size
16 if width > max_dim or height > max_dim:
17 scale_factor = min(max_dim / width, max_dim / height)
18 new_width = int(width * scale_factor)
19 new_height = int(height * scale_factor)
20 image = image.resize((new_width, new_height))
21
22 image_path = os.path.join(output_dir, f"page_{i+1}.png")
23 image.save(image_path)
24 print(f"Saved page {i+1} as {image_path} (size: {image.size})")
25
26 print(f"Converted {len(images)} pages to PNG images")
27
28
29if __name__ == "__main__":
30 if len(sys.argv) != 3:
31 print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
32 sys.exit(1)
33 pdf_path = sys.argv[1]
34 output_directory = sys.argv[2]
35 convert(pdf_path, output_directory)