main
1import json
2import sys
3
4from pypdf import PdfReader, PdfWriter
5from pypdf.annotations import FreeText
6
7
8# Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
9
10
11def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
12 """Transform bounding box from image coordinates to PDF coordinates"""
13 # Image coordinates: origin at top-left, y increases downward
14 # PDF coordinates: origin at bottom-left, y increases upward
15 x_scale = pdf_width / image_width
16 y_scale = pdf_height / image_height
17
18 left = bbox[0] * x_scale
19 right = bbox[2] * x_scale
20
21 # Flip Y coordinates for PDF
22 top = pdf_height - (bbox[1] * y_scale)
23 bottom = pdf_height - (bbox[3] * y_scale)
24
25 return left, bottom, right, top
26
27
28def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
29 """Fill the PDF form with data from fields.json"""
30
31 # `fields.json` format described in forms.md.
32 with open(fields_json_path, "r") as f:
33 fields_data = json.load(f)
34
35 # Open the PDF
36 reader = PdfReader(input_pdf_path)
37 writer = PdfWriter()
38
39 # Copy all pages to writer
40 writer.append(reader)
41
42 # Get PDF dimensions for each page
43 pdf_dimensions = {}
44 for i, page in enumerate(reader.pages):
45 mediabox = page.mediabox
46 pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
47
48 # Process each form field
49 annotations = []
50 for field in fields_data["form_fields"]:
51 page_num = field["page_number"]
52
53 # Get page dimensions and transform coordinates.
54 page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
55 image_width = page_info["image_width"]
56 image_height = page_info["image_height"]
57 pdf_width, pdf_height = pdf_dimensions[page_num]
58
59 transformed_entry_box = transform_coordinates(
60 field["entry_bounding_box"],
61 image_width, image_height,
62 pdf_width, pdf_height
63 )
64
65 # Skip empty fields
66 if "entry_text" not in field or "text" not in field["entry_text"]:
67 continue
68 entry_text = field["entry_text"]
69 text = entry_text["text"]
70 if not text:
71 continue
72
73 font_name = entry_text.get("font", "Arial")
74 font_size = str(entry_text.get("font_size", 14)) + "pt"
75 font_color = entry_text.get("font_color", "000000")
76
77 # Font size/color seems to not work reliably across viewers:
78 # https://github.com/py-pdf/pypdf/issues/2084
79 annotation = FreeText(
80 text=text,
81 rect=transformed_entry_box,
82 font=font_name,
83 font_size=font_size,
84 font_color=font_color,
85 border_color=None,
86 background_color=None,
87 )
88 annotations.append(annotation)
89 # page_number is 0-based for pypdf
90 writer.add_annotation(page_number=page_num - 1, annotation=annotation)
91
92 # Save the filled PDF
93 with open(output_pdf_path, "wb") as output:
94 writer.write(output)
95
96 print(f"Successfully filled PDF form and saved to {output_pdf_path}")
97 print(f"Added {len(annotations)} text annotations")
98
99
100if __name__ == "__main__":
101 if len(sys.argv) != 4:
102 print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
103 sys.exit(1)
104 input_pdf = sys.argv[1]
105 fields_json = sys.argv[2]
106 output_pdf = sys.argv[3]
107
108 fill_pdf_form(input_pdf, fields_json, output_pdf)