main
1#!/usr/bin/env python3
2"""
3Create thumbnail grids from PowerPoint presentation slides.
4
5Creates a grid layout of slide thumbnails with configurable columns (max 6).
6Each grid contains up to cols×(cols+1) images. For presentations with more
7slides, multiple numbered grid files are created automatically.
8
9The program outputs the names of all files created.
10
11Output:
12- Single grid: {prefix}.jpg (if slides fit in one grid)
13- Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
14
15Grid limits by column count:
16- 3 cols: max 12 slides per grid (3×4)
17- 4 cols: max 20 slides per grid (4×5)
18- 5 cols: max 30 slides per grid (5×6) [default]
19- 6 cols: max 42 slides per grid (6×7)
20
21Usage:
22 python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
23
24Examples:
25 python thumbnail.py presentation.pptx
26 # Creates: thumbnails.jpg (using default prefix)
27 # Outputs:
28 # Created 1 grid(s):
29 # - thumbnails.jpg
30
31 python thumbnail.py large-deck.pptx grid --cols 4
32 # Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
33 # Outputs:
34 # Created 3 grid(s):
35 # - grid-1.jpg
36 # - grid-2.jpg
37 # - grid-3.jpg
38
39 python thumbnail.py template.pptx analysis --outline-placeholders
40 # Creates thumbnail grids with red outlines around text placeholders
41"""
42
43import argparse
44import subprocess
45import sys
46import tempfile
47from pathlib import Path
48
49from inventory import extract_text_inventory
50from PIL import Image, ImageDraw, ImageFont
51from pptx import Presentation
52
53# Constants
54THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
55CONVERSION_DPI = 100 # DPI for PDF to image conversion
56MAX_COLS = 6 # Maximum number of columns
57DEFAULT_COLS = 5 # Default number of columns
58JPEG_QUALITY = 95 # JPEG compression quality
59
60# Grid layout constants
61GRID_PADDING = 20 # Padding between thumbnails
62BORDER_WIDTH = 2 # Border width around thumbnails
63FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
64LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
65
66
67def main():
68 parser = argparse.ArgumentParser(
69 description="Create thumbnail grids from PowerPoint slides."
70 )
71 parser.add_argument("input", help="Input PowerPoint file (.pptx)")
72 parser.add_argument(
73 "output_prefix",
74 nargs="?",
75 default="thumbnails",
76 help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
77 )
78 parser.add_argument(
79 "--cols",
80 type=int,
81 default=DEFAULT_COLS,
82 help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
83 )
84 parser.add_argument(
85 "--outline-placeholders",
86 action="store_true",
87 help="Outline text placeholders with a colored border",
88 )
89
90 args = parser.parse_args()
91
92 # Validate columns
93 cols = min(args.cols, MAX_COLS)
94 if args.cols > MAX_COLS:
95 print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
96
97 # Validate input
98 input_path = Path(args.input)
99 if not input_path.exists() or input_path.suffix.lower() != ".pptx":
100 print(f"Error: Invalid PowerPoint file: {args.input}")
101 sys.exit(1)
102
103 # Construct output path (always JPG)
104 output_path = Path(f"{args.output_prefix}.jpg")
105
106 print(f"Processing: {args.input}")
107
108 try:
109 with tempfile.TemporaryDirectory() as temp_dir:
110 # Get placeholder regions if outlining is enabled
111 placeholder_regions = None
112 slide_dimensions = None
113 if args.outline_placeholders:
114 print("Extracting placeholder regions...")
115 placeholder_regions, slide_dimensions = get_placeholder_regions(
116 input_path
117 )
118 if placeholder_regions:
119 print(f"Found placeholders on {len(placeholder_regions)} slides")
120
121 # Convert slides to images
122 slide_images = convert_to_images(input_path, Path(temp_dir), CONVERSION_DPI)
123 if not slide_images:
124 print("Error: No slides found")
125 sys.exit(1)
126
127 print(f"Found {len(slide_images)} slides")
128
129 # Create grids (max cols×(cols+1) images per grid)
130 grid_files = create_grids(
131 slide_images,
132 cols,
133 THUMBNAIL_WIDTH,
134 output_path,
135 placeholder_regions,
136 slide_dimensions,
137 )
138
139 # Print saved files
140 print(f"Created {len(grid_files)} grid(s):")
141 for grid_file in grid_files:
142 print(f" - {grid_file}")
143
144 except Exception as e:
145 print(f"Error: {e}")
146 sys.exit(1)
147
148
149def create_hidden_slide_placeholder(size):
150 """Create placeholder image for hidden slides."""
151 img = Image.new("RGB", size, color="#F0F0F0")
152 draw = ImageDraw.Draw(img)
153 line_width = max(5, min(size) // 100)
154 draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
155 draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
156 return img
157
158
159def get_placeholder_regions(pptx_path):
160 """Extract ALL text regions from the presentation.
161
162 Returns a tuple of (placeholder_regions, slide_dimensions).
163 text_regions is a dict mapping slide indices to lists of text regions.
164 Each region is a dict with 'left', 'top', 'width', 'height' in inches.
165 slide_dimensions is a tuple of (width_inches, height_inches).
166 """
167 prs = Presentation(str(pptx_path))
168 inventory = extract_text_inventory(pptx_path, prs)
169 placeholder_regions = {}
170
171 # Get actual slide dimensions in inches (EMU to inches conversion)
172 slide_width_inches = (prs.slide_width or 9144000) / 914400.0
173 slide_height_inches = (prs.slide_height or 5143500) / 914400.0
174
175 for slide_key, shapes in inventory.items():
176 # Extract slide index from "slide-N" format
177 slide_idx = int(slide_key.split("-")[1])
178 regions = []
179
180 for shape_key, shape_data in shapes.items():
181 # The inventory only contains shapes with text, so all shapes should be highlighted
182 regions.append(
183 {
184 "left": shape_data.left,
185 "top": shape_data.top,
186 "width": shape_data.width,
187 "height": shape_data.height,
188 }
189 )
190
191 if regions:
192 placeholder_regions[slide_idx] = regions
193
194 return placeholder_regions, (slide_width_inches, slide_height_inches)
195
196
197def convert_to_images(pptx_path, temp_dir, dpi):
198 """Convert PowerPoint to images via PDF, handling hidden slides."""
199 # Detect hidden slides
200 print("Analyzing presentation...")
201 prs = Presentation(str(pptx_path))
202 total_slides = len(prs.slides)
203
204 # Find hidden slides (1-based indexing for display)
205 hidden_slides = {
206 idx + 1
207 for idx, slide in enumerate(prs.slides)
208 if slide.element.get("show") == "0"
209 }
210
211 print(f"Total slides: {total_slides}")
212 if hidden_slides:
213 print(f"Hidden slides: {sorted(hidden_slides)}")
214
215 pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
216
217 # Convert to PDF
218 print("Converting to PDF...")
219 result = subprocess.run(
220 [
221 "soffice",
222 "--headless",
223 "--convert-to",
224 "pdf",
225 "--outdir",
226 str(temp_dir),
227 str(pptx_path),
228 ],
229 capture_output=True,
230 text=True,
231 )
232 if result.returncode != 0 or not pdf_path.exists():
233 raise RuntimeError("PDF conversion failed")
234
235 # Convert PDF to images
236 print(f"Converting to images at {dpi} DPI...")
237 result = subprocess.run(
238 ["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
239 capture_output=True,
240 text=True,
241 )
242 if result.returncode != 0:
243 raise RuntimeError("Image conversion failed")
244
245 visible_images = sorted(temp_dir.glob("slide-*.jpg"))
246
247 # Create full list with placeholders for hidden slides
248 all_images = []
249 visible_idx = 0
250
251 # Get placeholder dimensions from first visible slide
252 if visible_images:
253 with Image.open(visible_images[0]) as img:
254 placeholder_size = img.size
255 else:
256 placeholder_size = (1920, 1080)
257
258 for slide_num in range(1, total_slides + 1):
259 if slide_num in hidden_slides:
260 # Create placeholder image for hidden slide
261 placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
262 placeholder_img = create_hidden_slide_placeholder(placeholder_size)
263 placeholder_img.save(placeholder_path, "JPEG")
264 all_images.append(placeholder_path)
265 else:
266 # Use the actual visible slide image
267 if visible_idx < len(visible_images):
268 all_images.append(visible_images[visible_idx])
269 visible_idx += 1
270
271 return all_images
272
273
274def create_grids(
275 image_paths,
276 cols,
277 width,
278 output_path,
279 placeholder_regions=None,
280 slide_dimensions=None,
281):
282 """Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
283 # Maximum images per grid is cols × (cols + 1) for better proportions
284 max_images_per_grid = cols * (cols + 1)
285 grid_files = []
286
287 print(
288 f"Creating grids with {cols} columns (max {max_images_per_grid} images per grid)"
289 )
290
291 # Split images into chunks
292 for chunk_idx, start_idx in enumerate(
293 range(0, len(image_paths), max_images_per_grid)
294 ):
295 end_idx = min(start_idx + max_images_per_grid, len(image_paths))
296 chunk_images = image_paths[start_idx:end_idx]
297
298 # Create grid for this chunk
299 grid = create_grid(
300 chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions
301 )
302
303 # Generate output filename
304 if len(image_paths) <= max_images_per_grid:
305 # Single grid - use base filename without suffix
306 grid_filename = output_path
307 else:
308 # Multiple grids - insert index before extension with dash
309 stem = output_path.stem
310 suffix = output_path.suffix
311 grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
312
313 # Save grid
314 grid_filename.parent.mkdir(parents=True, exist_ok=True)
315 grid.save(str(grid_filename), quality=JPEG_QUALITY)
316 grid_files.append(str(grid_filename))
317
318 return grid_files
319
320
321def create_grid(
322 image_paths,
323 cols,
324 width,
325 start_slide_num=0,
326 placeholder_regions=None,
327 slide_dimensions=None,
328):
329 """Create thumbnail grid from slide images with optional placeholder outlining."""
330 font_size = int(width * FONT_SIZE_RATIO)
331 label_padding = int(font_size * LABEL_PADDING_RATIO)
332
333 # Get dimensions
334 with Image.open(image_paths[0]) as img:
335 aspect = img.height / img.width
336 height = int(width * aspect)
337
338 # Calculate grid size
339 rows = (len(image_paths) + cols - 1) // cols
340 grid_w = cols * width + (cols + 1) * GRID_PADDING
341 grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
342
343 # Create grid
344 grid = Image.new("RGB", (grid_w, grid_h), "white")
345 draw = ImageDraw.Draw(grid)
346
347 # Load font with size based on thumbnail width
348 try:
349 # Use Pillow's default font with size
350 font = ImageFont.load_default(size=font_size)
351 except Exception:
352 # Fall back to basic default font if size parameter not supported
353 font = ImageFont.load_default()
354
355 # Place thumbnails
356 for i, img_path in enumerate(image_paths):
357 row, col = i // cols, i % cols
358 x = col * width + (col + 1) * GRID_PADDING
359 y_base = (
360 row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
361 )
362
363 # Add label with actual slide number
364 label = f"{start_slide_num + i}"
365 bbox = draw.textbbox((0, 0), label, font=font)
366 text_w = bbox[2] - bbox[0]
367 draw.text(
368 (x + (width - text_w) // 2, y_base + label_padding),
369 label,
370 fill="black",
371 font=font,
372 )
373
374 # Add thumbnail below label with proportional spacing
375 y_thumbnail = y_base + label_padding + font_size + label_padding
376
377 with Image.open(img_path) as img:
378 # Get original dimensions before thumbnail
379 orig_w, orig_h = img.size
380
381 # Apply placeholder outlines if enabled
382 if placeholder_regions and (start_slide_num + i) in placeholder_regions:
383 # Convert to RGBA for transparency support
384 if img.mode != "RGBA":
385 img = img.convert("RGBA")
386
387 # Get the regions for this slide
388 regions = placeholder_regions[start_slide_num + i]
389
390 # Calculate scale factors using actual slide dimensions
391 if slide_dimensions:
392 slide_width_inches, slide_height_inches = slide_dimensions
393 else:
394 # Fallback: estimate from image size at CONVERSION_DPI
395 slide_width_inches = orig_w / CONVERSION_DPI
396 slide_height_inches = orig_h / CONVERSION_DPI
397
398 x_scale = orig_w / slide_width_inches
399 y_scale = orig_h / slide_height_inches
400
401 # Create a highlight overlay
402 overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
403 overlay_draw = ImageDraw.Draw(overlay)
404
405 # Highlight each placeholder region
406 for region in regions:
407 # Convert from inches to pixels in the original image
408 px_left = int(region["left"] * x_scale)
409 px_top = int(region["top"] * y_scale)
410 px_width = int(region["width"] * x_scale)
411 px_height = int(region["height"] * y_scale)
412
413 # Draw highlight outline with red color and thick stroke
414 # Using a bright red outline instead of fill
415 stroke_width = max(
416 5, min(orig_w, orig_h) // 150
417 ) # Thicker proportional stroke width
418 overlay_draw.rectangle(
419 [(px_left, px_top), (px_left + px_width, px_top + px_height)],
420 outline=(255, 0, 0, 255), # Bright red, fully opaque
421 width=stroke_width,
422 )
423
424 # Composite the overlay onto the image using alpha blending
425 img = Image.alpha_composite(img, overlay)
426 # Convert back to RGB for JPEG saving
427 img = img.convert("RGB")
428
429 img.thumbnail((width, height), Image.Resampling.LANCZOS)
430 w, h = img.size
431 tx = x + (width - w) // 2
432 ty = y_thumbnail + (height - h) // 2
433 grid.paste(img, (tx, ty))
434
435 # Add border
436 if BORDER_WIDTH > 0:
437 draw.rectangle(
438 [
439 (tx - BORDER_WIDTH, ty - BORDER_WIDTH),
440 (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
441 ],
442 outline="gray",
443 width=BORDER_WIDTH,
444 )
445
446 return grid
447
448
449if __name__ == "__main__":
450 main()