main
 1#!/usr/bin/env python3
 2"""Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
 3
 4import random
 5import sys
 6import defusedxml.minidom
 7import zipfile
 8from pathlib import Path
 9
10# Get command line arguments
11assert len(sys.argv) == 3, "Usage: python unpack.py <office_file> <output_dir>"
12input_file, output_dir = sys.argv[1], sys.argv[2]
13
14# Extract and format
15output_path = Path(output_dir)
16output_path.mkdir(parents=True, exist_ok=True)
17zipfile.ZipFile(input_file).extractall(output_path)
18
19# Pretty print all XML files
20xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
21for xml_file in xml_files:
22    content = xml_file.read_text(encoding="utf-8")
23    dom = defusedxml.minidom.parseString(content)
24    xml_file.write_bytes(dom.toprettyxml(indent="  ", encoding="ascii"))
25
26# For .docx files, suggest an RSID for tracked changes
27if input_file.endswith(".docx"):
28    suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
29    print(f"Suggested RSID for edit session: {suggested_rsid}")