skills/skills/docx/scripts/document.py at main

   1#!/usr/bin/env python3
   2"""
   3Library for working with Word documents: comments, tracked changes, and editing.
   4
   5Usage:
   6    from skills.docx.scripts.document import Document
   7
   8    # Initialize
   9    doc = Document('workspace/unpacked')
  10    doc = Document('workspace/unpacked', author="John Doe", initials="JD")
  11
  12    # Find nodes
  13    node = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "1"})
  14    node = doc["word/document.xml"].get_node(tag="w:p", line_number=10)
  15
  16    # Add comments
  17    doc.add_comment(start=node, end=node, text="Comment text")
  18    doc.reply_to_comment(parent_comment_id=0, text="Reply text")
  19
  20    # Suggest tracked changes
  21    doc["word/document.xml"].suggest_deletion(node)  # Delete content
  22    doc["word/document.xml"].revert_insertion(ins_node)  # Reject insertion
  23    doc["word/document.xml"].revert_deletion(del_node)  # Reject deletion
  24
  25    # Save
  26    doc.save()
  27"""
  28
  29import html
  30import random
  31import shutil
  32import tempfile
  33from datetime import datetime, timezone
  34from pathlib import Path
  35
  36from defusedxml import minidom
  37from ooxml.scripts.pack import pack_document
  38from ooxml.scripts.validation.docx import DOCXSchemaValidator
  39from ooxml.scripts.validation.redlining import RedliningValidator
  40
  41from .utilities import XMLEditor
  42
  43# Path to template files
  44TEMPLATE_DIR = Path(__file__).parent / "templates"
  45
  46
  47class DocxXMLEditor(XMLEditor):
  48    """XMLEditor that automatically applies RSID, author, and date to new elements.
  49
  50    Automatically adds attributes to elements that support them when inserting new content:
  51    - w:rsidR, w:rsidRDefault, w:rsidP (for w:p and w:r elements)
  52    - w:author and w:date (for w:ins, w:del, w:comment elements)
  53    - w:id (for w:ins and w:del elements)
  54
  55    Attributes:
  56        dom (defusedxml.minidom.Document): The DOM document for direct manipulation
  57    """
  58
  59    def __init__(
  60        self, xml_path, rsid: str, author: str = "Claude", initials: str = "C"
  61    ):
  62        """Initialize with required RSID and optional author.
  63
  64        Args:
  65            xml_path: Path to XML file to edit
  66            rsid: RSID to automatically apply to new elements
  67            author: Author name for tracked changes and comments (default: "Claude")
  68            initials: Author initials (default: "C")
  69        """
  70        super().__init__(xml_path)
  71        self.rsid = rsid
  72        self.author = author
  73        self.initials = initials
  74
  75    def _get_next_change_id(self):
  76        """Get the next available change ID by checking all tracked change elements."""
  77        max_id = -1
  78        for tag in ("w:ins", "w:del"):
  79            elements = self.dom.getElementsByTagName(tag)
  80            for elem in elements:
  81                change_id = elem.getAttribute("w:id")
  82                if change_id:
  83                    try:
  84                        max_id = max(max_id, int(change_id))
  85                    except ValueError:
  86                        pass
  87        return max_id + 1
  88
  89    def _ensure_w16du_namespace(self):
  90        """Ensure w16du namespace is declared on the root element."""
  91        root = self.dom.documentElement
  92        if not root.hasAttribute("xmlns:w16du"):  # type: ignore
  93            root.setAttribute(  # type: ignore
  94                "xmlns:w16du",
  95                "http://schemas.microsoft.com/office/word/2023/wordml/word16du",
  96            )
  97
  98    def _ensure_w16cex_namespace(self):
  99        """Ensure w16cex namespace is declared on the root element."""
 100        root = self.dom.documentElement
 101        if not root.hasAttribute("xmlns:w16cex"):  # type: ignore
 102            root.setAttribute(  # type: ignore
 103                "xmlns:w16cex",
 104                "http://schemas.microsoft.com/office/word/2018/wordml/cex",
 105            )
 106
 107    def _ensure_w14_namespace(self):
 108        """Ensure w14 namespace is declared on the root element."""
 109        root = self.dom.documentElement
 110        if not root.hasAttribute("xmlns:w14"):  # type: ignore
 111            root.setAttribute(  # type: ignore
 112                "xmlns:w14",
 113                "http://schemas.microsoft.com/office/word/2010/wordml",
 114            )
 115
 116    def _inject_attributes_to_nodes(self, nodes):
 117        """Inject RSID, author, and date attributes into DOM nodes where applicable.
 118
 119        Adds attributes to elements that support them:
 120        - w:r: gets w:rsidR (or w:rsidDel if inside w:del)
 121        - w:p: gets w:rsidR, w:rsidRDefault, w:rsidP, w14:paraId, w14:textId
 122        - w:t: gets xml:space="preserve" if text has leading/trailing whitespace
 123        - w:ins, w:del: get w:id, w:author, w:date, w16du:dateUtc
 124        - w:comment: gets w:author, w:date, w:initials
 125        - w16cex:commentExtensible: gets w16cex:dateUtc
 126
 127        Args:
 128            nodes: List of DOM nodes to process
 129        """
 130        from datetime import datetime, timezone
 131
 132        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 133
 134        def is_inside_deletion(elem):
 135            """Check if element is inside a w:del element."""
 136            parent = elem.parentNode
 137            while parent:
 138                if parent.nodeType == parent.ELEMENT_NODE and parent.tagName == "w:del":
 139                    return True
 140                parent = parent.parentNode
 141            return False
 142
 143        def add_rsid_to_p(elem):
 144            if not elem.hasAttribute("w:rsidR"):
 145                elem.setAttribute("w:rsidR", self.rsid)
 146            if not elem.hasAttribute("w:rsidRDefault"):
 147                elem.setAttribute("w:rsidRDefault", self.rsid)
 148            if not elem.hasAttribute("w:rsidP"):
 149                elem.setAttribute("w:rsidP", self.rsid)
 150            # Add w14:paraId and w14:textId if not present
 151            if not elem.hasAttribute("w14:paraId"):
 152                self._ensure_w14_namespace()
 153                elem.setAttribute("w14:paraId", _generate_hex_id())
 154            if not elem.hasAttribute("w14:textId"):
 155                self._ensure_w14_namespace()
 156                elem.setAttribute("w14:textId", _generate_hex_id())
 157
 158        def add_rsid_to_r(elem):
 159            # Use w:rsidDel for <w:r> inside <w:del>, otherwise w:rsidR
 160            if is_inside_deletion(elem):
 161                if not elem.hasAttribute("w:rsidDel"):
 162                    elem.setAttribute("w:rsidDel", self.rsid)
 163            else:
 164                if not elem.hasAttribute("w:rsidR"):
 165                    elem.setAttribute("w:rsidR", self.rsid)
 166
 167        def add_tracked_change_attrs(elem):
 168            # Auto-assign w:id if not present
 169            if not elem.hasAttribute("w:id"):
 170                elem.setAttribute("w:id", str(self._get_next_change_id()))
 171            if not elem.hasAttribute("w:author"):
 172                elem.setAttribute("w:author", self.author)
 173            if not elem.hasAttribute("w:date"):
 174                elem.setAttribute("w:date", timestamp)
 175            # Add w16du:dateUtc for tracked changes (same as w:date since we generate UTC timestamps)
 176            if elem.tagName in ("w:ins", "w:del") and not elem.hasAttribute(
 177                "w16du:dateUtc"
 178            ):
 179                self._ensure_w16du_namespace()
 180                elem.setAttribute("w16du:dateUtc", timestamp)
 181
 182        def add_comment_attrs(elem):
 183            if not elem.hasAttribute("w:author"):
 184                elem.setAttribute("w:author", self.author)
 185            if not elem.hasAttribute("w:date"):
 186                elem.setAttribute("w:date", timestamp)
 187            if not elem.hasAttribute("w:initials"):
 188                elem.setAttribute("w:initials", self.initials)
 189
 190        def add_comment_extensible_date(elem):
 191            # Add w16cex:dateUtc for comment extensible elements
 192            if not elem.hasAttribute("w16cex:dateUtc"):
 193                self._ensure_w16cex_namespace()
 194                elem.setAttribute("w16cex:dateUtc", timestamp)
 195
 196        def add_xml_space_to_t(elem):
 197            # Add xml:space="preserve" to w:t if text has leading/trailing whitespace
 198            if (
 199                elem.firstChild
 200                and elem.firstChild.nodeType == elem.firstChild.TEXT_NODE
 201            ):
 202                text = elem.firstChild.data
 203                if text and (text[0].isspace() or text[-1].isspace()):
 204                    if not elem.hasAttribute("xml:space"):
 205                        elem.setAttribute("xml:space", "preserve")
 206
 207        for node in nodes:
 208            if node.nodeType != node.ELEMENT_NODE:
 209                continue
 210
 211            # Handle the node itself
 212            if node.tagName == "w:p":
 213                add_rsid_to_p(node)
 214            elif node.tagName == "w:r":
 215                add_rsid_to_r(node)
 216            elif node.tagName == "w:t":
 217                add_xml_space_to_t(node)
 218            elif node.tagName in ("w:ins", "w:del"):
 219                add_tracked_change_attrs(node)
 220            elif node.tagName == "w:comment":
 221                add_comment_attrs(node)
 222            elif node.tagName == "w16cex:commentExtensible":
 223                add_comment_extensible_date(node)
 224
 225            # Process descendants (getElementsByTagName doesn't return the element itself)
 226            for elem in node.getElementsByTagName("w:p"):
 227                add_rsid_to_p(elem)
 228            for elem in node.getElementsByTagName("w:r"):
 229                add_rsid_to_r(elem)
 230            for elem in node.getElementsByTagName("w:t"):
 231                add_xml_space_to_t(elem)
 232            for tag in ("w:ins", "w:del"):
 233                for elem in node.getElementsByTagName(tag):
 234                    add_tracked_change_attrs(elem)
 235            for elem in node.getElementsByTagName("w:comment"):
 236                add_comment_attrs(elem)
 237            for elem in node.getElementsByTagName("w16cex:commentExtensible"):
 238                add_comment_extensible_date(elem)
 239
 240    def replace_node(self, elem, new_content):
 241        """Replace node with automatic attribute injection."""
 242        nodes = super().replace_node(elem, new_content)
 243        self._inject_attributes_to_nodes(nodes)
 244        return nodes
 245
 246    def insert_after(self, elem, xml_content):
 247        """Insert after with automatic attribute injection."""
 248        nodes = super().insert_after(elem, xml_content)
 249        self._inject_attributes_to_nodes(nodes)
 250        return nodes
 251
 252    def insert_before(self, elem, xml_content):
 253        """Insert before with automatic attribute injection."""
 254        nodes = super().insert_before(elem, xml_content)
 255        self._inject_attributes_to_nodes(nodes)
 256        return nodes
 257
 258    def append_to(self, elem, xml_content):
 259        """Append to with automatic attribute injection."""
 260        nodes = super().append_to(elem, xml_content)
 261        self._inject_attributes_to_nodes(nodes)
 262        return nodes
 263
 264    def revert_insertion(self, elem):
 265        """Reject an insertion by wrapping its content in a deletion.
 266
 267        Wraps all runs inside w:ins in w:del, converting w:t to w:delText.
 268        Can process a single w:ins element or a container element with multiple w:ins.
 269
 270        Args:
 271            elem: Element to process (w:ins, w:p, w:body, etc.)
 272
 273        Returns:
 274            list: List containing the processed element(s)
 275
 276        Raises:
 277            ValueError: If the element contains no w:ins elements
 278
 279        Example:
 280            # Reject a single insertion
 281            ins = doc["word/document.xml"].get_node(tag="w:ins", attrs={"w:id": "5"})
 282            doc["word/document.xml"].revert_insertion(ins)
 283
 284            # Reject all insertions in a paragraph
 285            para = doc["word/document.xml"].get_node(tag="w:p", line_number=42)
 286            doc["word/document.xml"].revert_insertion(para)
 287        """
 288        # Collect insertions
 289        ins_elements = []
 290        if elem.tagName == "w:ins":
 291            ins_elements.append(elem)
 292        else:
 293            ins_elements.extend(elem.getElementsByTagName("w:ins"))
 294
 295        # Validate that there are insertions to reject
 296        if not ins_elements:
 297            raise ValueError(
 298                f"revert_insertion requires w:ins elements. "
 299                f"The provided element <{elem.tagName}> contains no insertions. "
 300            )
 301
 302        # Process all insertions - wrap all children in w:del
 303        for ins_elem in ins_elements:
 304            runs = list(ins_elem.getElementsByTagName("w:r"))
 305            if not runs:
 306                continue
 307
 308            # Create deletion wrapper
 309            del_wrapper = self.dom.createElement("w:del")
 310
 311            # Process each run
 312            for run in runs:
 313                # Convert w:t → w:delText and w:rsidR → w:rsidDel
 314                if run.hasAttribute("w:rsidR"):
 315                    run.setAttribute("w:rsidDel", run.getAttribute("w:rsidR"))
 316                    run.removeAttribute("w:rsidR")
 317                elif not run.hasAttribute("w:rsidDel"):
 318                    run.setAttribute("w:rsidDel", self.rsid)
 319
 320                for t_elem in list(run.getElementsByTagName("w:t")):
 321                    del_text = self.dom.createElement("w:delText")
 322                    # Copy ALL child nodes (not just firstChild) to handle entities
 323                    while t_elem.firstChild:
 324                        del_text.appendChild(t_elem.firstChild)
 325                    for i in range(t_elem.attributes.length):
 326                        attr = t_elem.attributes.item(i)
 327                        del_text.setAttribute(attr.name, attr.value)
 328                    t_elem.parentNode.replaceChild(del_text, t_elem)
 329
 330            # Move all children from ins to del wrapper
 331            while ins_elem.firstChild:
 332                del_wrapper.appendChild(ins_elem.firstChild)
 333
 334            # Add del wrapper back to ins
 335            ins_elem.appendChild(del_wrapper)
 336
 337            # Inject attributes to the deletion wrapper
 338            self._inject_attributes_to_nodes([del_wrapper])
 339
 340        return [elem]
 341
 342    def revert_deletion(self, elem):
 343        """Reject a deletion by re-inserting the deleted content.
 344
 345        Creates w:ins elements after each w:del, copying deleted content and
 346        converting w:delText back to w:t.
 347        Can process a single w:del element or a container element with multiple w:del.
 348
 349        Args:
 350            elem: Element to process (w:del, w:p, w:body, etc.)
 351
 352        Returns:
 353            list: If elem is w:del, returns [elem, new_ins]. Otherwise returns [elem].
 354
 355        Raises:
 356            ValueError: If the element contains no w:del elements
 357
 358        Example:
 359            # Reject a single deletion - returns [w:del, w:ins]
 360            del_elem = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "3"})
 361            nodes = doc["word/document.xml"].revert_deletion(del_elem)
 362
 363            # Reject all deletions in a paragraph - returns [para]
 364            para = doc["word/document.xml"].get_node(tag="w:p", line_number=42)
 365            nodes = doc["word/document.xml"].revert_deletion(para)
 366        """
 367        # Collect deletions FIRST - before we modify the DOM
 368        del_elements = []
 369        is_single_del = elem.tagName == "w:del"
 370
 371        if is_single_del:
 372            del_elements.append(elem)
 373        else:
 374            del_elements.extend(elem.getElementsByTagName("w:del"))
 375
 376        # Validate that there are deletions to reject
 377        if not del_elements:
 378            raise ValueError(
 379                f"revert_deletion requires w:del elements. "
 380                f"The provided element <{elem.tagName}> contains no deletions. "
 381            )
 382
 383        # Track created insertion (only relevant if elem is a single w:del)
 384        created_insertion = None
 385
 386        # Process all deletions - create insertions that copy the deleted content
 387        for del_elem in del_elements:
 388            # Clone the deleted runs and convert them to insertions
 389            runs = list(del_elem.getElementsByTagName("w:r"))
 390            if not runs:
 391                continue
 392
 393            # Create insertion wrapper
 394            ins_elem = self.dom.createElement("w:ins")
 395
 396            for run in runs:
 397                # Clone the run
 398                new_run = run.cloneNode(True)
 399
 400                # Convert w:delText → w:t
 401                for del_text in list(new_run.getElementsByTagName("w:delText")):
 402                    t_elem = self.dom.createElement("w:t")
 403                    # Copy ALL child nodes (not just firstChild) to handle entities
 404                    while del_text.firstChild:
 405                        t_elem.appendChild(del_text.firstChild)
 406                    for i in range(del_text.attributes.length):
 407                        attr = del_text.attributes.item(i)
 408                        t_elem.setAttribute(attr.name, attr.value)
 409                    del_text.parentNode.replaceChild(t_elem, del_text)
 410
 411                # Update run attributes: w:rsidDel → w:rsidR
 412                if new_run.hasAttribute("w:rsidDel"):
 413                    new_run.setAttribute("w:rsidR", new_run.getAttribute("w:rsidDel"))
 414                    new_run.removeAttribute("w:rsidDel")
 415                elif not new_run.hasAttribute("w:rsidR"):
 416                    new_run.setAttribute("w:rsidR", self.rsid)
 417
 418                ins_elem.appendChild(new_run)
 419
 420            # Insert the new insertion after the deletion
 421            nodes = self.insert_after(del_elem, ins_elem.toxml())
 422
 423            # If processing a single w:del, track the created insertion
 424            if is_single_del and nodes:
 425                created_insertion = nodes[0]
 426
 427        # Return based on input type
 428        if is_single_del and created_insertion:
 429            return [elem, created_insertion]
 430        else:
 431            return [elem]
 432
 433    @staticmethod
 434    def suggest_paragraph(xml_content: str) -> str:
 435        """Transform paragraph XML to add tracked change wrapping for insertion.
 436
 437        Wraps runs in <w:ins> and adds <w:ins/> to w:rPr in w:pPr for numbered lists.
 438
 439        Args:
 440            xml_content: XML string containing a <w:p> element
 441
 442        Returns:
 443            str: Transformed XML with tracked change wrapping
 444        """
 445        wrapper = f'<root xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">{xml_content}</root>'
 446        doc = minidom.parseString(wrapper)
 447        para = doc.getElementsByTagName("w:p")[0]
 448
 449        # Ensure w:pPr exists
 450        pPr_list = para.getElementsByTagName("w:pPr")
 451        if not pPr_list:
 452            pPr = doc.createElement("w:pPr")
 453            para.insertBefore(
 454                pPr, para.firstChild
 455            ) if para.firstChild else para.appendChild(pPr)
 456        else:
 457            pPr = pPr_list[0]
 458
 459        # Ensure w:rPr exists in w:pPr
 460        rPr_list = pPr.getElementsByTagName("w:rPr")
 461        if not rPr_list:
 462            rPr = doc.createElement("w:rPr")
 463            pPr.appendChild(rPr)
 464        else:
 465            rPr = rPr_list[0]
 466
 467        # Add <w:ins/> to w:rPr
 468        ins_marker = doc.createElement("w:ins")
 469        rPr.insertBefore(
 470            ins_marker, rPr.firstChild
 471        ) if rPr.firstChild else rPr.appendChild(ins_marker)
 472
 473        # Wrap all non-pPr children in <w:ins>
 474        ins_wrapper = doc.createElement("w:ins")
 475        for child in [c for c in para.childNodes if c.nodeName != "w:pPr"]:
 476            para.removeChild(child)
 477            ins_wrapper.appendChild(child)
 478        para.appendChild(ins_wrapper)
 479
 480        return para.toxml()
 481
 482    def suggest_deletion(self, elem):
 483        """Mark a w:r or w:p element as deleted with tracked changes (in-place DOM manipulation).
 484
 485        For w:r: wraps in <w:del>, converts <w:t> to <w:delText>, preserves w:rPr
 486        For w:p (regular): wraps content in <w:del>, converts <w:t> to <w:delText>
 487        For w:p (numbered list): adds <w:del/> to w:rPr in w:pPr, wraps content in <w:del>
 488
 489        Args:
 490            elem: A w:r or w:p DOM element without existing tracked changes
 491
 492        Returns:
 493            Element: The modified element
 494
 495        Raises:
 496            ValueError: If element has existing tracked changes or invalid structure
 497        """
 498        if elem.nodeName == "w:r":
 499            # Check for existing w:delText
 500            if elem.getElementsByTagName("w:delText"):
 501                raise ValueError("w:r element already contains w:delText")
 502
 503            # Convert w:t → w:delText
 504            for t_elem in list(elem.getElementsByTagName("w:t")):
 505                del_text = self.dom.createElement("w:delText")
 506                # Copy ALL child nodes (not just firstChild) to handle entities
 507                while t_elem.firstChild:
 508                    del_text.appendChild(t_elem.firstChild)
 509                # Preserve attributes like xml:space
 510                for i in range(t_elem.attributes.length):
 511                    attr = t_elem.attributes.item(i)
 512                    del_text.setAttribute(attr.name, attr.value)
 513                t_elem.parentNode.replaceChild(del_text, t_elem)
 514
 515            # Update run attributes: w:rsidR → w:rsidDel
 516            if elem.hasAttribute("w:rsidR"):
 517                elem.setAttribute("w:rsidDel", elem.getAttribute("w:rsidR"))
 518                elem.removeAttribute("w:rsidR")
 519            elif not elem.hasAttribute("w:rsidDel"):
 520                elem.setAttribute("w:rsidDel", self.rsid)
 521
 522            # Wrap in w:del
 523            del_wrapper = self.dom.createElement("w:del")
 524            parent = elem.parentNode
 525            parent.insertBefore(del_wrapper, elem)
 526            parent.removeChild(elem)
 527            del_wrapper.appendChild(elem)
 528
 529            # Inject attributes to the deletion wrapper
 530            self._inject_attributes_to_nodes([del_wrapper])
 531
 532            return del_wrapper
 533
 534        elif elem.nodeName == "w:p":
 535            # Check for existing tracked changes
 536            if elem.getElementsByTagName("w:ins") or elem.getElementsByTagName("w:del"):
 537                raise ValueError("w:p element already contains tracked changes")
 538
 539            # Check if it's a numbered list item
 540            pPr_list = elem.getElementsByTagName("w:pPr")
 541            is_numbered = pPr_list and pPr_list[0].getElementsByTagName("w:numPr")
 542
 543            if is_numbered:
 544                # Add <w:del/> to w:rPr in w:pPr
 545                pPr = pPr_list[0]
 546                rPr_list = pPr.getElementsByTagName("w:rPr")
 547
 548                if not rPr_list:
 549                    rPr = self.dom.createElement("w:rPr")
 550                    pPr.appendChild(rPr)
 551                else:
 552                    rPr = rPr_list[0]
 553
 554                # Add <w:del/> marker
 555                del_marker = self.dom.createElement("w:del")
 556                rPr.insertBefore(
 557                    del_marker, rPr.firstChild
 558                ) if rPr.firstChild else rPr.appendChild(del_marker)
 559
 560            # Convert w:t → w:delText in all runs
 561            for t_elem in list(elem.getElementsByTagName("w:t")):
 562                del_text = self.dom.createElement("w:delText")
 563                # Copy ALL child nodes (not just firstChild) to handle entities
 564                while t_elem.firstChild:
 565                    del_text.appendChild(t_elem.firstChild)
 566                # Preserve attributes like xml:space
 567                for i in range(t_elem.attributes.length):
 568                    attr = t_elem.attributes.item(i)
 569                    del_text.setAttribute(attr.name, attr.value)
 570                t_elem.parentNode.replaceChild(del_text, t_elem)
 571
 572            # Update run attributes: w:rsidR → w:rsidDel
 573            for run in elem.getElementsByTagName("w:r"):
 574                if run.hasAttribute("w:rsidR"):
 575                    run.setAttribute("w:rsidDel", run.getAttribute("w:rsidR"))
 576                    run.removeAttribute("w:rsidR")
 577                elif not run.hasAttribute("w:rsidDel"):
 578                    run.setAttribute("w:rsidDel", self.rsid)
 579
 580            # Wrap all non-pPr children in <w:del>
 581            del_wrapper = self.dom.createElement("w:del")
 582            for child in [c for c in elem.childNodes if c.nodeName != "w:pPr"]:
 583                elem.removeChild(child)
 584                del_wrapper.appendChild(child)
 585            elem.appendChild(del_wrapper)
 586
 587            # Inject attributes to the deletion wrapper
 588            self._inject_attributes_to_nodes([del_wrapper])
 589
 590            return elem
 591
 592        else:
 593            raise ValueError(f"Element must be w:r or w:p, got {elem.nodeName}")
 594
 595
 596def _generate_hex_id() -> str:
 597    """Generate random 8-character hex ID for para/durable IDs.
 598
 599    Values are constrained to be less than 0x7FFFFFFF per OOXML spec:
 600    - paraId must be < 0x80000000
 601    - durableId must be < 0x7FFFFFFF
 602    We use the stricter constraint (0x7FFFFFFF) for both.
 603    """
 604    return f"{random.randint(1, 0x7FFFFFFE):08X}"
 605
 606
 607def _generate_rsid() -> str:
 608    """Generate random 8-character hex RSID."""
 609    return "".join(random.choices("0123456789ABCDEF", k=8))
 610
 611
 612class Document:
 613    """Manages comments in unpacked Word documents."""
 614
 615    def __init__(
 616        self,
 617        unpacked_dir,
 618        rsid=None,
 619        track_revisions=False,
 620        author="Claude",
 621        initials="C",
 622    ):
 623        """
 624        Initialize with path to unpacked Word document directory.
 625        Automatically sets up comment infrastructure (people.xml, RSIDs).
 626
 627        Args:
 628            unpacked_dir: Path to unpacked DOCX directory (must contain word/ subdirectory)
 629            rsid: Optional RSID to use for all comment elements. If not provided, one will be generated.
 630            track_revisions: If True, enables track revisions in settings.xml (default: False)
 631            author: Default author name for comments (default: "Claude")
 632            initials: Default author initials for comments (default: "C")
 633        """
 634        self.original_path = Path(unpacked_dir)
 635
 636        if not self.original_path.exists() or not self.original_path.is_dir():
 637            raise ValueError(f"Directory not found: {unpacked_dir}")
 638
 639        # Create temporary directory with subdirectories for unpacked content and baseline
 640        self.temp_dir = tempfile.mkdtemp(prefix="docx_")
 641        self.unpacked_path = Path(self.temp_dir) / "unpacked"
 642        shutil.copytree(self.original_path, self.unpacked_path)
 643
 644        # Pack original directory into temporary .docx for validation baseline (outside unpacked dir)
 645        self.original_docx = Path(self.temp_dir) / "original.docx"
 646        pack_document(self.original_path, self.original_docx, validate=False)
 647
 648        self.word_path = self.unpacked_path / "word"
 649
 650        # Generate RSID if not provided
 651        self.rsid = rsid if rsid else _generate_rsid()
 652        print(f"Using RSID: {self.rsid}")
 653
 654        # Set default author and initials
 655        self.author = author
 656        self.initials = initials
 657
 658        # Cache for lazy-loaded editors
 659        self._editors = {}
 660
 661        # Comment file paths
 662        self.comments_path = self.word_path / "comments.xml"
 663        self.comments_extended_path = self.word_path / "commentsExtended.xml"
 664        self.comments_ids_path = self.word_path / "commentsIds.xml"
 665        self.comments_extensible_path = self.word_path / "commentsExtensible.xml"
 666
 667        # Load existing comments and determine next ID (before setup modifies files)
 668        self.existing_comments = self._load_existing_comments()
 669        self.next_comment_id = self._get_next_comment_id()
 670
 671        # Convenient access to document.xml editor (semi-private)
 672        self._document = self["word/document.xml"]
 673
 674        # Setup tracked changes infrastructure
 675        self._setup_tracking(track_revisions=track_revisions)
 676
 677        # Add author to people.xml
 678        self._add_author_to_people(author)
 679
 680    def __getitem__(self, xml_path: str) -> DocxXMLEditor:
 681        """
 682        Get or create a DocxXMLEditor for the specified XML file.
 683
 684        Enables lazy-loaded editors with bracket notation:
 685            node = doc["word/document.xml"].get_node(tag="w:p", line_number=42)
 686
 687        Args:
 688            xml_path: Relative path to XML file (e.g., "word/document.xml", "word/comments.xml")
 689
 690        Returns:
 691            DocxXMLEditor instance for the specified file
 692
 693        Raises:
 694            ValueError: If the file does not exist
 695
 696        Example:
 697            # Get node from document.xml
 698            node = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "1"})
 699
 700            # Get node from comments.xml
 701            comment = doc["word/comments.xml"].get_node(tag="w:comment", attrs={"w:id": "0"})
 702        """
 703        if xml_path not in self._editors:
 704            file_path = self.unpacked_path / xml_path
 705            if not file_path.exists():
 706                raise ValueError(f"XML file not found: {xml_path}")
 707            # Use DocxXMLEditor with RSID, author, and initials for all editors
 708            self._editors[xml_path] = DocxXMLEditor(
 709                file_path, rsid=self.rsid, author=self.author, initials=self.initials
 710            )
 711        return self._editors[xml_path]
 712
 713    def add_comment(self, start, end, text: str) -> int:
 714        """
 715        Add a comment spanning from one element to another.
 716
 717        Args:
 718            start: DOM element for the starting point
 719            end: DOM element for the ending point
 720            text: Comment content
 721
 722        Returns:
 723            The comment ID that was created
 724
 725        Example:
 726            start_node = cm.get_document_node(tag="w:del", id="1")
 727            end_node = cm.get_document_node(tag="w:ins", id="2")
 728            cm.add_comment(start=start_node, end=end_node, text="Explanation")
 729        """
 730        comment_id = self.next_comment_id
 731        para_id = _generate_hex_id()
 732        durable_id = _generate_hex_id()
 733        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 734
 735        # Add comment ranges to document.xml immediately
 736        self._document.insert_before(start, self._comment_range_start_xml(comment_id))
 737
 738        # If end node is a paragraph, append comment markup inside it
 739        # Otherwise insert after it (for run-level anchors)
 740        if end.tagName == "w:p":
 741            self._document.append_to(end, self._comment_range_end_xml(comment_id))
 742        else:
 743            self._document.insert_after(end, self._comment_range_end_xml(comment_id))
 744
 745        # Add to comments.xml immediately
 746        self._add_to_comments_xml(
 747            comment_id, para_id, text, self.author, self.initials, timestamp
 748        )
 749
 750        # Add to commentsExtended.xml immediately
 751        self._add_to_comments_extended_xml(para_id, parent_para_id=None)
 752
 753        # Add to commentsIds.xml immediately
 754        self._add_to_comments_ids_xml(para_id, durable_id)
 755
 756        # Add to commentsExtensible.xml immediately
 757        self._add_to_comments_extensible_xml(durable_id)
 758
 759        # Update existing_comments so replies work
 760        self.existing_comments[comment_id] = {"para_id": para_id}
 761
 762        self.next_comment_id += 1
 763        return comment_id
 764
 765    def reply_to_comment(
 766        self,
 767        parent_comment_id: int,
 768        text: str,
 769    ) -> int:
 770        """
 771        Add a reply to an existing comment.
 772
 773        Args:
 774            parent_comment_id: The w:id of the parent comment to reply to
 775            text: Reply text
 776
 777        Returns:
 778            The comment ID that was created for the reply
 779
 780        Example:
 781            cm.reply_to_comment(parent_comment_id=0, text="I agree with this change")
 782        """
 783        if parent_comment_id not in self.existing_comments:
 784            raise ValueError(f"Parent comment with id={parent_comment_id} not found")
 785
 786        parent_info = self.existing_comments[parent_comment_id]
 787        comment_id = self.next_comment_id
 788        para_id = _generate_hex_id()
 789        durable_id = _generate_hex_id()
 790        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 791
 792        # Add comment ranges to document.xml immediately
 793        parent_start_elem = self._document.get_node(
 794            tag="w:commentRangeStart", attrs={"w:id": str(parent_comment_id)}
 795        )
 796        parent_ref_elem = self._document.get_node(
 797            tag="w:commentReference", attrs={"w:id": str(parent_comment_id)}
 798        )
 799
 800        self._document.insert_after(
 801            parent_start_elem, self._comment_range_start_xml(comment_id)
 802        )
 803        parent_ref_run = parent_ref_elem.parentNode
 804        self._document.insert_after(
 805            parent_ref_run, f'<w:commentRangeEnd w:id="{comment_id}"/>'
 806        )
 807        self._document.insert_after(
 808            parent_ref_run, self._comment_ref_run_xml(comment_id)
 809        )
 810
 811        # Add to comments.xml immediately
 812        self._add_to_comments_xml(
 813            comment_id, para_id, text, self.author, self.initials, timestamp
 814        )
 815
 816        # Add to commentsExtended.xml immediately (with parent)
 817        self._add_to_comments_extended_xml(
 818            para_id, parent_para_id=parent_info["para_id"]
 819        )
 820
 821        # Add to commentsIds.xml immediately
 822        self._add_to_comments_ids_xml(para_id, durable_id)
 823
 824        # Add to commentsExtensible.xml immediately
 825        self._add_to_comments_extensible_xml(durable_id)
 826
 827        # Update existing_comments so replies work
 828        self.existing_comments[comment_id] = {"para_id": para_id}
 829
 830        self.next_comment_id += 1
 831        return comment_id
 832
 833    def __del__(self):
 834        """Clean up temporary directory on deletion."""
 835        if hasattr(self, "temp_dir") and Path(self.temp_dir).exists():
 836            shutil.rmtree(self.temp_dir)
 837
 838    def validate(self) -> None:
 839        """
 840        Validate the document against XSD schema and redlining rules.
 841
 842        Raises:
 843            ValueError: If validation fails.
 844        """
 845        # Create validators with current state
 846        schema_validator = DOCXSchemaValidator(
 847            self.unpacked_path, self.original_docx, verbose=False
 848        )
 849        redlining_validator = RedliningValidator(
 850            self.unpacked_path, self.original_docx, verbose=False
 851        )
 852
 853        # Run validations
 854        if not schema_validator.validate():
 855            raise ValueError("Schema validation failed")
 856        if not redlining_validator.validate():
 857            raise ValueError("Redlining validation failed")
 858
 859    def save(self, destination=None, validate=True) -> None:
 860        """
 861        Save all modified XML files to disk and copy to destination directory.
 862
 863        This persists all changes made via add_comment() and reply_to_comment().
 864
 865        Args:
 866            destination: Optional path to save to. If None, saves back to original directory.
 867            validate: If True, validates document before saving (default: True).
 868        """
 869        # Only ensure comment relationships and content types if comment files exist
 870        if self.comments_path.exists():
 871            self._ensure_comment_relationships()
 872            self._ensure_comment_content_types()
 873
 874        # Save all modified XML files in temp directory
 875        for editor in self._editors.values():
 876            editor.save()
 877
 878        # Validate by default
 879        if validate:
 880            self.validate()
 881
 882        # Copy contents from temp directory to destination (or original directory)
 883        target_path = Path(destination) if destination else self.original_path
 884        shutil.copytree(self.unpacked_path, target_path, dirs_exist_ok=True)
 885
 886    # ==================== Private: Initialization ====================
 887
 888    def _get_next_comment_id(self):
 889        """Get the next available comment ID."""
 890        if not self.comments_path.exists():
 891            return 0
 892
 893        editor = self["word/comments.xml"]
 894        max_id = -1
 895        for comment_elem in editor.dom.getElementsByTagName("w:comment"):
 896            comment_id = comment_elem.getAttribute("w:id")
 897            if comment_id:
 898                try:
 899                    max_id = max(max_id, int(comment_id))
 900                except ValueError:
 901                    pass
 902        return max_id + 1
 903
 904    def _load_existing_comments(self):
 905        """Load existing comments from files to enable replies."""
 906        if not self.comments_path.exists():
 907            return {}
 908
 909        editor = self["word/comments.xml"]
 910        existing = {}
 911
 912        for comment_elem in editor.dom.getElementsByTagName("w:comment"):
 913            comment_id = comment_elem.getAttribute("w:id")
 914            if not comment_id:
 915                continue
 916
 917            # Find para_id from the w:p element within the comment
 918            para_id = None
 919            for p_elem in comment_elem.getElementsByTagName("w:p"):
 920                para_id = p_elem.getAttribute("w14:paraId")
 921                if para_id:
 922                    break
 923
 924            if not para_id:
 925                continue
 926
 927            existing[int(comment_id)] = {"para_id": para_id}
 928
 929        return existing
 930
 931    # ==================== Private: Setup Methods ====================
 932
 933    def _setup_tracking(self, track_revisions=False):
 934        """Set up comment infrastructure in unpacked directory.
 935
 936        Args:
 937            track_revisions: If True, enables track revisions in settings.xml
 938        """
 939        # Create or update word/people.xml
 940        people_file = self.word_path / "people.xml"
 941        self._update_people_xml(people_file)
 942
 943        # Update XML files
 944        self._add_content_type_for_people(self.unpacked_path / "[Content_Types].xml")
 945        self._add_relationship_for_people(
 946            self.word_path / "_rels" / "document.xml.rels"
 947        )
 948
 949        # Always add RSID to settings.xml, optionally enable trackRevisions
 950        self._update_settings(
 951            self.word_path / "settings.xml", track_revisions=track_revisions
 952        )
 953
 954    def _update_people_xml(self, path):
 955        """Create people.xml if it doesn't exist."""
 956        if not path.exists():
 957            # Copy from template
 958            shutil.copy(TEMPLATE_DIR / "people.xml", path)
 959
 960    def _add_content_type_for_people(self, path):
 961        """Add people.xml content type to [Content_Types].xml if not already present."""
 962        editor = self["[Content_Types].xml"]
 963
 964        if self._has_override(editor, "/word/people.xml"):
 965            return
 966
 967        # Add Override element
 968        root = editor.dom.documentElement
 969        override_xml = '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>'
 970        editor.append_to(root, override_xml)
 971
 972    def _add_relationship_for_people(self, path):
 973        """Add people.xml relationship to document.xml.rels if not already present."""
 974        editor = self["word/_rels/document.xml.rels"]
 975
 976        if self._has_relationship(editor, "people.xml"):
 977            return
 978
 979        root = editor.dom.documentElement
 980        root_tag = root.tagName  # type: ignore
 981        prefix = root_tag.split(":")[0] + ":" if ":" in root_tag else ""
 982        next_rid = editor.get_next_rid()
 983
 984        # Create the relationship entry
 985        rel_xml = f'<{prefix}Relationship Id="{next_rid}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>'
 986        editor.append_to(root, rel_xml)
 987
 988    def _update_settings(self, path, track_revisions=False):
 989        """Add RSID and optionally enable track revisions in settings.xml.
 990
 991        Args:
 992            path: Path to settings.xml
 993            track_revisions: If True, adds trackRevisions element
 994
 995        Places elements per OOXML schema order:
 996        - trackRevisions: early (before defaultTabStop)
 997        - rsids: late (after compat)
 998        """
 999        editor = self["word/settings.xml"]
1000        root = editor.get_node(tag="w:settings")
1001        prefix = root.tagName.split(":")[0] if ":" in root.tagName else "w"
1002
1003        # Conditionally add trackRevisions if requested
1004        if track_revisions:
1005            track_revisions_exists = any(
1006                elem.tagName == f"{prefix}:trackRevisions"
1007                for elem in editor.dom.getElementsByTagName(f"{prefix}:trackRevisions")
1008            )
1009
1010            if not track_revisions_exists:
1011                track_rev_xml = f"<{prefix}:trackRevisions/>"
1012                # Try to insert before documentProtection, defaultTabStop, or at start
1013                inserted = False
1014                for tag in [f"{prefix}:documentProtection", f"{prefix}:defaultTabStop"]:
1015                    elements = editor.dom.getElementsByTagName(tag)
1016                    if elements:
1017                        editor.insert_before(elements[0], track_rev_xml)
1018                        inserted = True
1019                        break
1020                if not inserted:
1021                    # Insert as first child of settings
1022                    if root.firstChild:
1023                        editor.insert_before(root.firstChild, track_rev_xml)
1024                    else:
1025                        editor.append_to(root, track_rev_xml)
1026
1027        # Always check if rsids section exists
1028        rsids_elements = editor.dom.getElementsByTagName(f"{prefix}:rsids")
1029
1030        if not rsids_elements:
1031            # Add new rsids section
1032            rsids_xml = f'''<{prefix}:rsids>
1033  <{prefix}:rsidRoot {prefix}:val="{self.rsid}"/>
1034  <{prefix}:rsid {prefix}:val="{self.rsid}"/>
1035</{prefix}:rsids>'''
1036
1037            # Try to insert after compat, before clrSchemeMapping, or before closing tag
1038            inserted = False
1039            compat_elements = editor.dom.getElementsByTagName(f"{prefix}:compat")
1040            if compat_elements:
1041                editor.insert_after(compat_elements[0], rsids_xml)
1042                inserted = True
1043
1044            if not inserted:
1045                clr_elements = editor.dom.getElementsByTagName(
1046                    f"{prefix}:clrSchemeMapping"
1047                )
1048                if clr_elements:
1049                    editor.insert_before(clr_elements[0], rsids_xml)
1050                    inserted = True
1051
1052            if not inserted:
1053                editor.append_to(root, rsids_xml)
1054        else:
1055            # Check if this rsid already exists
1056            rsids_elem = rsids_elements[0]
1057            rsid_exists = any(
1058                elem.getAttribute(f"{prefix}:val") == self.rsid
1059                for elem in rsids_elem.getElementsByTagName(f"{prefix}:rsid")
1060            )
1061
1062            if not rsid_exists:
1063                rsid_xml = f'<{prefix}:rsid {prefix}:val="{self.rsid}"/>'
1064                editor.append_to(rsids_elem, rsid_xml)
1065
1066    # ==================== Private: XML File Creation ====================
1067
1068    def _add_to_comments_xml(
1069        self, comment_id, para_id, text, author, initials, timestamp
1070    ):
1071        """Add a single comment to comments.xml."""
1072        if not self.comments_path.exists():
1073            shutil.copy(TEMPLATE_DIR / "comments.xml", self.comments_path)
1074
1075        editor = self["word/comments.xml"]
1076        root = editor.get_node(tag="w:comments")
1077
1078        escaped_text = (
1079            text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
1080        )
1081        # Note: w:rsidR, w:rsidRDefault, w:rsidP on w:p, w:rsidR on w:r,
1082        # and w:author, w:date, w:initials on w:comment are automatically added by DocxXMLEditor
1083        comment_xml = f'''<w:comment w:id="{comment_id}">
1084  <w:p w14:paraId="{para_id}" w14:textId="77777777">
1085    <w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:annotationRef/></w:r>
1086    <w:r><w:rPr><w:color w:val="000000"/><w:sz w:val="20"/><w:szCs w:val="20"/></w:rPr><w:t>{escaped_text}</w:t></w:r>
1087  </w:p>
1088</w:comment>'''
1089        editor.append_to(root, comment_xml)
1090
1091    def _add_to_comments_extended_xml(self, para_id, parent_para_id):
1092        """Add a single comment to commentsExtended.xml."""
1093        if not self.comments_extended_path.exists():
1094            shutil.copy(
1095                TEMPLATE_DIR / "commentsExtended.xml", self.comments_extended_path
1096            )
1097
1098        editor = self["word/commentsExtended.xml"]
1099        root = editor.get_node(tag="w15:commentsEx")
1100
1101        if parent_para_id:
1102            xml = f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para_id}" w15:done="0"/>'
1103        else:
1104            xml = f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>'
1105        editor.append_to(root, xml)
1106
1107    def _add_to_comments_ids_xml(self, para_id, durable_id):
1108        """Add a single comment to commentsIds.xml."""
1109        if not self.comments_ids_path.exists():
1110            shutil.copy(TEMPLATE_DIR / "commentsIds.xml", self.comments_ids_path)
1111
1112        editor = self["word/commentsIds.xml"]
1113        root = editor.get_node(tag="w16cid:commentsIds")
1114
1115        xml = f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>'
1116        editor.append_to(root, xml)
1117
1118    def _add_to_comments_extensible_xml(self, durable_id):
1119        """Add a single comment to commentsExtensible.xml."""
1120        if not self.comments_extensible_path.exists():
1121            shutil.copy(
1122                TEMPLATE_DIR / "commentsExtensible.xml", self.comments_extensible_path
1123            )
1124
1125        editor = self["word/commentsExtensible.xml"]
1126        root = editor.get_node(tag="w16cex:commentsExtensible")
1127
1128        xml = f'<w16cex:commentExtensible w16cex:durableId="{durable_id}"/>'
1129        editor.append_to(root, xml)
1130
1131    # ==================== Private: XML Fragments ====================
1132
1133    def _comment_range_start_xml(self, comment_id):
1134        """Generate XML for comment range start."""
1135        return f'<w:commentRangeStart w:id="{comment_id}"/>'
1136
1137    def _comment_range_end_xml(self, comment_id):
1138        """Generate XML for comment range end with reference run.
1139
1140        Note: w:rsidR is automatically added by DocxXMLEditor.
1141        """
1142        return f'''<w:commentRangeEnd w:id="{comment_id}"/>
1143<w:r>
1144  <w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
1145  <w:commentReference w:id="{comment_id}"/>
1146</w:r>'''
1147
1148    def _comment_ref_run_xml(self, comment_id):
1149        """Generate XML for comment reference run.
1150
1151        Note: w:rsidR is automatically added by DocxXMLEditor.
1152        """
1153        return f'''<w:r>
1154  <w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
1155  <w:commentReference w:id="{comment_id}"/>
1156</w:r>'''
1157
1158    # ==================== Private: Metadata Updates ====================
1159
1160    def _has_relationship(self, editor, target):
1161        """Check if a relationship with given target exists."""
1162        for rel_elem in editor.dom.getElementsByTagName("Relationship"):
1163            if rel_elem.getAttribute("Target") == target:
1164                return True
1165        return False
1166
1167    def _has_override(self, editor, part_name):
1168        """Check if an override with given part name exists."""
1169        for override_elem in editor.dom.getElementsByTagName("Override"):
1170            if override_elem.getAttribute("PartName") == part_name:
1171                return True
1172        return False
1173
1174    def _has_author(self, editor, author):
1175        """Check if an author already exists in people.xml."""
1176        for person_elem in editor.dom.getElementsByTagName("w15:person"):
1177            if person_elem.getAttribute("w15:author") == author:
1178                return True
1179        return False
1180
1181    def _add_author_to_people(self, author):
1182        """Add author to people.xml (called during initialization)."""
1183        people_path = self.word_path / "people.xml"
1184
1185        # people.xml should already exist from _setup_tracking
1186        if not people_path.exists():
1187            raise ValueError("people.xml should exist after _setup_tracking")
1188
1189        editor = self["word/people.xml"]
1190        root = editor.get_node(tag="w15:people")
1191
1192        # Check if author already exists
1193        if self._has_author(editor, author):
1194            return
1195
1196        # Add author with proper XML escaping to prevent injection
1197        escaped_author = html.escape(author, quote=True)
1198        person_xml = f'''<w15:person w15:author="{escaped_author}">
1199  <w15:presenceInfo w15:providerId="None" w15:userId="{escaped_author}"/>
1200</w15:person>'''
1201        editor.append_to(root, person_xml)
1202
1203    def _ensure_comment_relationships(self):
1204        """Ensure word/_rels/document.xml.rels has comment relationships."""
1205        editor = self["word/_rels/document.xml.rels"]
1206
1207        if self._has_relationship(editor, "comments.xml"):
1208            return
1209
1210        root = editor.dom.documentElement
1211        root_tag = root.tagName  # type: ignore
1212        prefix = root_tag.split(":")[0] + ":" if ":" in root_tag else ""
1213        next_rid_num = int(editor.get_next_rid()[3:])
1214
1215        # Add relationship elements
1216        rels = [
1217            (
1218                next_rid_num,
1219                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
1220                "comments.xml",
1221            ),
1222            (
1223                next_rid_num + 1,
1224                "http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
1225                "commentsExtended.xml",
1226            ),
1227            (
1228                next_rid_num + 2,
1229                "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
1230                "commentsIds.xml",
1231            ),
1232            (
1233                next_rid_num + 3,
1234                "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible",
1235                "commentsExtensible.xml",
1236            ),
1237        ]
1238
1239        for rel_id, rel_type, target in rels:
1240            rel_xml = f'<{prefix}Relationship Id="rId{rel_id}" Type="{rel_type}" Target="{target}"/>'
1241            editor.append_to(root, rel_xml)
1242
1243    def _ensure_comment_content_types(self):
1244        """Ensure [Content_Types].xml has comment content types."""
1245        editor = self["[Content_Types].xml"]
1246
1247        if self._has_override(editor, "/word/comments.xml"):
1248            return
1249
1250        root = editor.dom.documentElement
1251
1252        # Add Override elements
1253        overrides = [
1254            (
1255                "/word/comments.xml",
1256                "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
1257            ),
1258            (
1259                "/word/commentsExtended.xml",
1260                "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
1261            ),
1262            (
1263                "/word/commentsIds.xml",
1264                "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
1265            ),
1266            (
1267                "/word/commentsExtensible.xml",
1268                "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml",
1269            ),
1270        ]
1271
1272        for part_name, content_type in overrides:
1273            override_xml = (
1274                f'<Override PartName="{part_name}" ContentType="{content_type}"/>'
1275            )
1276            editor.append_to(root, override_xml)