diff --git a/scripts/gen-hl-skeleton-template/main.go b/scripts/gen-hl-skeleton-template/main.go index ade987c..868e93e 100644 --- a/scripts/gen-hl-skeleton-template/main.go +++ b/scripts/gen-hl-skeleton-template/main.go @@ -1,4 +1,6 @@ -// Merge-safe styled firm-skeleton generator (t-paliad-275 → t-paliad-364 P3a). +// Merge-safe firm-skeleton generator, rebuilt FROM the HLC Patents Style +// .dotm (t-paliad-367, supersedes the t-paliad-364 synthesised-Rubrum +// approach). // // Produces the firm-formatted, MERGE-SAFE Schriftsatz skeleton paliad's // submission generator picks up via the merge-path fallback chain @@ -9,54 +11,79 @@ // // Both tiers are GUARDED by docx.HasMergePlaceholders: a template only // wins the merge path if word/document.xml carries real {{key}} -// placeholders. The firm-skeleton's body had been repurposed into an -// anchors-only Composer base ({{#section:KEY}} markers; t-paliad-313 -// Slice B), so the guard rejected it and every generated submission fell -// back to the in-process docx.BuildFallbackSkeleton — a plain, generic -// (Heading1/Normal) Rubrum (kepler diagnosis t-paliad-363 §P3a). This -// generator restores a merge-safe firm-styled body so the guard accepts -// it again and the resolver auto-prefers it (no handler change). +// placeholders, so the body this tool emits MUST carry them. // -// HOW: it does NOT rebuild the package from the macro-bearing .dotm. -// Instead it takes an already-clean .docx CARRIER (the deployed -// firm-skeleton) and replaces ONLY word/document.xml with a clean, -// caption-driven Rubrum, preserving every other part byte-for-byte — -// the firm styles.xml, theme, numbering, fontTable, the letterhead -// header[12]/footer[12] + logo media, customXml, settings. The carrier's -// own namespaces and (which wires the letterhead -// header/footer references) are reused verbatim, so the output keeps the -// firm letterhead on every page. +// WHY THE REBUILD (m mandate 2026-06-01): the earlier generator +// SYNTHESISED a flat, paragraph-based Rubrum in Go — it borrowed the +// firm's *style names* but re-authored the letterhead layout itself. m's +// complaint ("I dont want that formatting in code! it should use our word +// files and styles") was exactly that. The HLC Patents Style .dotm already +// contains the real, firm-authored letterhead + Rubrum in Word: a header +// table (logo via header2 + sender/recipient address blocks), a +// case-information table, and a recitals table — all in the firm's own +// HLCpat- styles. This generator LIFTS that authored layout verbatim and +// only swaps the firm's example text for paliad's merge placeholders. No +// letterhead/Rubrum formatting is authored in code. // -// The Rubrum body MIRRORS docx.BuildFallbackSkeleton (the in-process -// merge fallback) — same layout, same {{key}} / {{caption.*}} placeholder -// surface — but applies the firm's named paragraph styles instead of the -// generic Heading2/Normal: party lines → Table-Recitals-Party / -// PartyDetails / PartyRoles, the versus connector → Sequencers, section -// heads → Heading-H2, the signature block → Signature, -// everything else → Body-B0. +// PIPELINE (single command, -in points at the .dotm): // -// The caption wording (heading / designations / versus / subject) comes -// from the SHARED parametric {{caption.*}} keys (t-paliad-358 A-S2), in -// their bare (draft-language-resolved) form, so the same file renders DE -// or EN caption wording per draft. Only the static scaffold labels -// ("Aktenzeichen:", "wegen", …) and the today/our-side aliases are -// language-baked — hence one file per language. +// 1. docx.ConvertDotmToDocx — strips the macro parts (vbaProject.bin + +// rels, vbaData.xml, customizations.xml), demotes the template content +// type to plain document, and rewrites the manifests. Idempotent on a +// .docx, so a pre-converted carrier also works. Every other part — +// styles.xml (HLCpat-), theme, numbering, settings, fontTable, the +// header1/2 + footer1/2 letterhead and its logo media, glossary, +// customXml — passes through bit-for-bit. +// +// 2. word/footer1.xml — the firm-NAME structured-document-tag content +// ("Hogan Lovells") is placeholderised to {{firm.name}} (A-S3 +// firm-agnostic branding). The footer2 legal-entity boilerplate +// (registered office, members' liability, office list) is LEFT AS-IS: +// A-S3 keeps a firm's legal facts intact; only the display NAME flexes. +// +// 3. word/document.xml — the authored is kept verbatim from the +// header table through the end of the recitals table (the firm's three +// letterhead tables), then TRUNCATED: the Table of Contents, the formal +// checklist demo and the entire "Template Info And Manual" style-guide +// section that follow in the .dotm are document-internal documentation, +// not part of a submission, so they are dropped. The carrier's own +// open tag and are reused verbatim, so the +// header/footer references (rId16=header1, rId17=footer1, +// rId18=header2 first-page, rId19=footer2 first-page), the A4 geometry +// and the titlePg first-page setup keep the firm letterhead on every +// page. +// +// Inside the kept region a structure-aware paragraph walk swaps the firm's +// example text for merge placeholders (see exampleRules). The walk is +// occurrence-ordered, not a blind string replace: Word fragments text +// across runs (rsid tracking) and the recitals carry two identical +// "Representative Details" / "represented by" lines, so each rule is +// consumed in document order. A transformed paragraph keeps its +// (paragraph style + numbering suppression + indents) verbatim and carries +// the placeholder in a single run; the named HLCpat- style supplies the +// formatting, so no run properties are authored here. // // Style-prefix drift: the firm style IDs are auto-detected from the -// carrier's word/styles.xml. The originally-deployed firm-skeleton uses -// the `HLpat-` prefix; the upstream `HLC Patents Style.dotm` was rebuilt -// during the HL→HLC rebrand and now emits `HLCpat-`. Detecting the prefix -// from the carrier keeps this generator correct against either source and -// across that migration. (Reconciling the prefix across all consumers is -// a separate follow-up — flagged in t-paliad-364.) +// carrier's word/styles.xml — the current HLC Patents Style .dotm emits +// `HLCpat-`, the originally-deployed firm-skeleton used `HLpat-`. Detecting +// the prefix keeps this generator correct against either source. (Rebuilding +// from the current .dotm reconciles the drift flagged in t-paliad-366 item +// 1: carrier + Rubrum body are then both HLCpat-.) +// +// DATA GAP (t-paliad-367, flagged to m): paliad has no structured HL office +// postal addresses (internal/offices carries labels only) and no court_id +// FK on projects (project.court is free text), so the SENDER address block +// (HL Düsseldorf, real firm identity) and the RECIPIENT court address lines +// are left as the .dotm's authored editable text; only the recipient court +// NAME is wired to {{project.court}}. When office-address data + a court_id +// linkage land, the corresponding rules below switch from "keep" to a +// placeholder. // // Run (one file per language): // -// go run ./scripts/gen-hl-skeleton-template -in carrier.docx -lang de -out _firm-skeleton.docx -// go run ./scripts/gen-hl-skeleton-template -in carrier.docx -lang en -out _skeleton.en.docx +// go run ./scripts/gen-hl-skeleton-template -in HLC-Patents-Style.dotm -lang de -out _firm-skeleton.docx +// go run ./scripts/gen-hl-skeleton-template -in HLC-Patents-Style.dotm -lang en -out _skeleton.en.docx // -// where carrier.docx is the deployed firm-skeleton fetched from -// HL/mWorkRepo:6 - material/Templates/Word/Paliad/HLC/_firm-skeleton.docx. // Output is byte-stable across runs for a given (input, lang). package main @@ -70,16 +97,18 @@ import ( "regexp" "strings" "time" + + "mgit.msbls.de/m/paliad/pkg/docforge/docx" ) func main() { - in := flag.String("in", "", "path to the clean .docx carrier (deployed firm-skeleton) — required") + in := flag.String("in", "", "path to the firm style .dotm (or pre-converted .docx) — required") out := flag.String("out", "_firm-skeleton.docx", "output .docx path") lang := flag.String("lang", "de", "draft language for the static scaffold labels: de | en") flag.Parse() if *in == "" { - fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: -in is required (path to the clean .docx firm-skeleton carrier)") + fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: -in is required (path to the HLC Patents Style .dotm)") os.Exit(2) } l := strings.ToLower(strings.TrimSpace(*lang)) @@ -88,42 +117,46 @@ func main() { os.Exit(2) } - srcBytes, err := os.ReadFile(*in) + raw, err := os.ReadFile(*in) if err != nil { - fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: read carrier:", err) + fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: read input:", err) os.Exit(1) } - docx, err := buildDocx(srcBytes, l) + // Step 1: macro-strip the .dotm into a clean .docx carrier (idempotent + // on an already-clean .docx). + carrier, err := docx.ConvertDotmToDocx(raw) + if err != nil { + fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: convert .dotm→.docx:", err) + os.Exit(1) + } + + result, err := buildSkeleton(carrier, l) if err != nil { fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template:", err) os.Exit(1) } - if err := os.WriteFile(*out, docx, 0o644); err != nil { + if err := os.WriteFile(*out, result, 0o644); err != nil { fmt.Fprintln(os.Stderr, "gen-hl-skeleton-template: write:", err) os.Exit(1) } - fmt.Printf("wrote %s (%d bytes, lang=%s)\n", *out, len(docx), l) + fmt.Printf("wrote %s (%d bytes, lang=%s)\n", *out, len(result), l) } // fixedTime pins every zip entry's mtime so successive runs over the same -// (carrier, lang) produce byte-stable output. Useful for diffing the -// generated file in review. +// (carrier, lang) produce byte-stable output. var fixedTime = time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC) -// buildDocx copies every part of the carrier byte-for-byte except -// word/document.xml, which is replaced with the merge-safe firm-styled -// Rubrum for the requested language. The carrier's own open -// tag and are reused so the letterhead header/footer wiring is -// preserved exactly. -func buildDocx(src []byte, lang string) ([]byte, error) { - zr, err := zip.NewReader(bytes.NewReader(src), int64(len(src))) +// buildSkeleton copies every part of the carrier byte-for-byte except +// word/document.xml (rebuilt: authored letterhead/Rubrum tables + +// placeholders, manual truncated) and word/footer1.xml (firm name → +// {{firm.name}}). +func buildSkeleton(carrier []byte, lang string) ([]byte, error) { + zr, err := zip.NewReader(bytes.NewReader(carrier), int64(len(carrier))) if err != nil { return nil, fmt.Errorf("open carrier zip: %w", err) } - // Read the two parts we need to inspect: styles.xml (prefix detection) - // and document.xml (open tag + sectPr reuse). var stylesXML, docXML string for _, f := range zr.File { switch f.Name { @@ -152,15 +185,10 @@ func buildDocx(src []byte, lang string) ([]byte, error) { if err != nil { return nil, err } - openTag, err := documentOpenTag(docXML) + newDoc, err := rebuildDocument(docXML, lang, prefix) if err != nil { return nil, err } - sectPr, err := extractSectPr(docXML) - if err != nil { - return nil, err - } - newDoc := buildDocumentXML(lang, prefix, openTag, sectPr) var buf bytes.Buffer zw := zip.NewWriter(&buf) @@ -169,8 +197,11 @@ func buildDocx(src []byte, lang string) ([]byte, error) { if err != nil { return nil, fmt.Errorf("read %s: %w", f.Name, err) } - if f.Name == "word/document.xml" { + switch f.Name { + case "word/document.xml": body = []byte(newDoc) + case "word/footer1.xml": + body = []byte(placeholderiseFooterFirmName(string(body))) } w, err := zw.CreateHeader(&zip.FileHeader{ Name: f.Name, @@ -200,10 +231,9 @@ func readZipEntry(f *zip.File) ([]byte, error) { } // detectStylePrefix returns the firm style-id prefix the carrier defines — -// "HLCpat-" (current HLC Patents Style.dotm) or "HLpat-" (the originally +// "HLCpat-" (current HLC Patents Style .dotm) or "HLpat-" (the originally // deployed firm-skeleton) — keyed off the Recitals-Party style every firm -// Rubrum needs. Erroring out here is deliberate: a carrier missing the -// Recitals styles would silently produce an unstyled document. +// Rubrum needs. func detectStylePrefix(stylesXML string) (string, error) { for _, p := range []string{"HLCpat-", "HLpat-"} { if strings.Contains(stylesXML, `w:styleId="`+p+`Table-Recitals-Party"`) { @@ -213,189 +243,287 @@ func detectStylePrefix(stylesXML string) (string, error) { return "", fmt.Errorf("carrier styles.xml carries neither HLCpat-Table-Recitals-Party nor HLpat-Table-Recitals-Party — not a firm-styled skeleton") } +// placeholderiseFooterFirmName swaps the firm-NAME structured-document-tag +// content in footer1 ("Hogan Lovells") for {{firm.name}} (A-S3). The run is +// the only bare Hogan Lovells node in footer1; the footer2 legal +// boilerplate is a different part and is left untouched. +func placeholderiseFooterFirmName(footerXML string) string { + return strings.Replace(footerXML, + `Hogan Lovells`, + `{{firm.name}}`, 1) +} + var ( - docOpenRegex = regexp.MustCompile(`(?s)]*>`) - sectPrRegex = regexp.MustCompile(`(?s)`) + docOpenRegex = regexp.MustCompile(`(?s)]*>`) + sectPrRegex = regexp.MustCompile(`(?s)`) + tblCloseRegex = regexp.MustCompile(``) + paragraphRe = regexp.MustCompile(`(?s)`) + pPrEndRe = regexp.MustCompile(`(?s)^.*?`) + pOpenRe = regexp.MustCompile(`^]*>`) + pStyleRe = regexp.MustCompile(`w:pStyle w:val="([^"]*)"`) + wtRe = regexp.MustCompile(`(?s)]*>.*?`) + tagRe = regexp.MustCompile(`<[^>]*>`) ) -// documentOpenTag returns the carrier's open tag verbatim so -// the rebuilt body keeps the exact namespace declarations the sectPr (r:id -// refs) and styles rely on. -func documentOpenTag(docXML string) (string, error) { - m := docOpenRegex.FindString(docXML) - if m == "" { +// rebuildDocument keeps the carrier's open tag and +// verbatim, lifts the authored body from through the end of the +// recitals table (the third top-level table), runs the placeholder walk over +// it, appends a minimal body + signature region, and re-attaches the sectPr. +func rebuildDocument(docXML, lang, prefix string) (string, error) { + openTag := docOpenRegex.FindString(docXML) + if openTag == "" { return "", fmt.Errorf("carrier document.xml has no open tag") } - return m, nil -} - -// extractSectPr returns the carrier's block verbatim. -// It wires the letterhead header/footer references (rId16=header1, -// rId17=footer1, rId18=header2 first-page, rId19=footer2 first-page) and the -// A4 page geometry; reusing it keeps the firm letterhead on every page. -func extractSectPr(docXML string) (string, error) { - m := sectPrRegex.FindString(docXML) - if m == "" { + sectPr := sectPrRegex.FindString(docXML) + if sectPr == "" { return "", fmt.Errorf("carrier document.xml has no — cannot preserve letterhead wiring") } - return m, nil -} -// firmLabels holds the language-dependent static scaffold text. Dynamic -// values stay as {{key}} placeholders regardless of language; the caption -// pieces use the BARE {{caption.*}} keys (draft-language-resolved) so the -// procedural wording flips DE/EN per draft even though the scaffold labels -// are baked. Mirrors docx.fallbackLabelsFor so the firm-styled and -// in-process fallbacks read identically. -type firmLabels struct { - editor string - dateKey string - caseNo string - representedBy string - others string - wegen string - subjectLabel string - patent string - proceeding string - ourSideKey string - bodyHint string - closing string -} - -func labelsFor(lang string) firmLabels { - if lang == "en" { - return firmLabels{ - editor: "Attorney:", - dateKey: "{{today.long_en}}", - caseNo: "Case no.:", - representedBy: "represented by", - others: "Further parties:", - wegen: "re", - subjectLabel: "Subject", - patent: "Patent in suit:", - proceeding: "Proceeding:", - ourSideKey: "{{project.our_side_en}}", - bodyHint: "[Body of the submission goes here. This is a basic firm-styled skeleton — fill in according to the submission type.]", - closing: "Closing", - } + bodyOpen := strings.Index(docXML, "") + if bodyOpen < 0 { + return "", fmt.Errorf("carrier document.xml has no ") } - return firmLabels{ - editor: "Bearbeiter:", - dateKey: "{{today.long_de}}", - caseNo: "Aktenzeichen:", - representedBy: "vertreten durch", - others: "Weitere Beteiligte:", - wegen: "wegen", - subjectLabel: "Betreff", - patent: "Streitpatent:", - proceeding: "Verfahrensart:", - ourSideKey: "{{project.our_side_de}}", - bodyHint: "[Hier folgt der Schriftsatztext. Diese Skelett-Vorlage trägt keine vorgefertigte Struktur — bitte gemäß Schriftsatz-Typ ergänzen.]", - closing: "Schlussformel", + bodyStart := bodyOpen + len("") + + // The .dotm's authored letterhead is three top-level tables in order: + // (1) header table (logo + sender/recipient addresses), (2) case-info + // table, (3) recitals table. Everything after the recitals table close + // (TOC, checklist demo, "Template Info And Manual") is documentation and + // is dropped. + closes := tblCloseRegex.FindAllStringIndex(docXML, -1) + if len(closes) < 3 { + return "", fmt.Errorf("carrier document.xml has %d tables, expected ≥3 (header, case-info, recitals)", len(closes)) + } + cut := closes[2][1] // end offset of the third + if cut <= bodyStart { + return "", fmt.Errorf("recitals table ends before body starts — unexpected document shape") } -} -// buildDocumentXML emits the merge-safe firm-styled Rubrum body. Layout -// mirrors docx.buildFallbackDocumentXML (author/date → court/case/proceeding -// → Rubrum heading → claimant block → versus → defendant block → others → -// wegen-subject → patent → body placeholder → closing/signature) so the two -// merge fallbacks stay structurally identical; only the paragraph styles -// differ (firm HLpat/HLCpat styles here vs generic Heading2/Normal there). -func buildDocumentXML(lang, prefix, openTag, sectPr string) string { - l := labelsFor(lang) - - body0 := prefix + "Body-B0" - heading := prefix + "Heading-H2" - party := prefix + "Table-Recitals-Party" - partyDetails := prefix + "Table-Recitals-PartyDetails" - partyRoles := prefix + "Table-Recitals-PartyRoles" - sequencer := prefix + "Table-Recitals-Sequencers" - signature := prefix + "Signature" + keep := docXML[bodyStart:cut] + transformed := transformBody(keep, lang) var b strings.Builder b.WriteString(``) b.WriteString(openTag) b.WriteString(``) - - // Author / date block. The firm identity + logo live in the letterhead - // header/footer (preserved via the carrier's sectPr), so they are not - // repeated in the body. - para(&b, body0, l.editor+" {{user.display_name}}") - para(&b, body0, "{{user.email}} · {{user.office}}") - para(&b, body0, l.dateKey) - - // Court + case number + proceeding. - para(&b, body0, "{{project.court}}") - para(&b, body0, l.caseNo+" {{project.case_number}}") - para(&b, body0, l.proceeding+" {{project.proceeding.name}}") - - // Rubrum heading — parametric caption wording, no outline number. - headingNoNum(&b, heading, "{{caption.heading}}") - - // Claimant block (Recitals-Party auto-numbers it "1."). - para(&b, party, "{{parties.claimant.name}}") - para(&b, partyDetails, l.representedBy+" {{parties.claimant.representative}}") - para(&b, partyRoles, "— {{caption.claimant_designation}} —") - - // Versus connector. - para(&b, sequencer, "{{caption.versus}}") - - // Defendant block (Recitals-Party auto-numbers it "2."). - para(&b, party, "{{parties.defendant.name}}") - para(&b, partyDetails, l.representedBy+" {{parties.defendant.representative}}") - para(&b, partyRoles, "— {{caption.defendant_designation}} —") - - // Further parties + subject. - para(&b, partyDetails, l.others+" {{parties.other.name}}") - para(&b, body0, l.wegen+" {{caption.subject}}") - - // Patent in suit. - headingNoNum(&b, heading, l.subjectLabel) - para(&b, body0, l.patent+" {{project.patent_number}}") - para(&b, body0, "{{project.title}} ("+l.ourSideKey+")") - - // Body placeholder for the actual submission text. - para(&b, body0, "") - para(&b, body0, l.bodyHint) - para(&b, body0, "") - - // Closing / signature. - headingNoNum(&b, heading, l.closing) - para(&b, body0, l.dateKey) - para(&b, signature, "{{user.display_name}}") - para(&b, signature, "{{firm.signature_block}}") - - // sectPr — reused verbatim from the carrier (letterhead wiring + A4 - // geometry). + b.WriteString(transformed) + b.WriteString(buildClosing(lang, prefix)) b.WriteString(sectPr) b.WriteString(``) + return b.String(), nil +} + +// action is what a rule does to a matched paragraph. +type action int + +const ( + actPlaceholder action = iota // replace run text with a fixed string (placeholder and/or label) + actClear // empty the paragraph (keep its styled paragraph mark) +) + +// rule maps one authored example paragraph (matched by style + normalised +// text, consumed in document order) to its replacement. langText picks the +// replacement by draft language: a value may be a pure {{placeholder}} +// (language-agnostic), a translated structural connector, or a mix. +type rule struct { + style string + text string // normalised example text to match + de string + en string + act action + used bool +} + +// exampleRules is the document-ordered mapping from the .dotm's authored +// example text to paliad merge placeholders. Order matters: duplicate +// (style,text) pairs — the two "Representative Details" and "represented by" +// lines — are consumed claimant-first, then defendant. Paragraphs with no +// matching rule are kept verbatim (the firm's authored layout, scaffolding, +// sender/recipient address blocks, and section labels). +// +// nbsp + doubled spaces in the source are normalised away before matching; +// curly apostrophes are folded to straight. +var exampleRules = []rule{ + // — Recipient: court name. Address lines (Denisstraße 3 / 80335 München) + // are kept as authored editable defaults pending a court_id linkage. — + {style: "", text: "Unified Patent Court", de: "{{project.court}}", en: "{{project.court}}", act: actPlaceholder}, + {style: "", text: "Munich Local Division", de: "", en: "", act: actClear}, + + // — Sender contact (the colleague handling the matter). — + {style: "Address", text: "[Name]", de: "{{user.display_name}}", en: "{{user.display_name}}", act: actPlaceholder}, + {style: "Address", text: "[name.lastname]@hoganlovells.com", de: "{{user.email}}", en: "{{user.email}}", act: actPlaceholder}, + // [Position] and "T [phone number" are kept as authored editable text + // (paliad has no title/phone fields). + + // — Date. — + {style: "", text: "14 April 2026", de: "{{today.long_de}}", en: "{{today.long_en}}", act: actPlaceholder}, + + // — Case-information table. — + {style: "HLCpat-Body-B0", text: "EP 1 234 567", de: "{{project.patent_number}}", en: "{{project.patent_number}}", act: actPlaceholder}, + {style: "HLCpat-Body-B0", text: "UPC_CFI_", de: "{{project.case_number}}", en: "{{project.case_number}}", act: actPlaceholder}, + + // — Submission title / subtitle. — + {style: "HLCpat-Table-Header-Title", text: "Submission Title", de: "{{procedural_event.name}}", en: "{{procedural_event.name}}", act: actPlaceholder}, + {style: "HLCpat-Table-Header-Subtitle", text: "Submission Subtitle", de: "", en: "", act: actClear}, + + // — Recitals: structural connectors + parties. — + {style: "HLCpat-Body-B0", text: "In the matter of", de: "In Sachen", en: "In the matter of", act: actPlaceholder}, + + // Claimant (Proactive Party). + {style: "HLCpat-Table-Recitals-Party", text: "Proactive Party", de: "{{parties.claimant.name}}", en: "{{parties.claimant.name}}", act: actPlaceholder}, + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Proactive Party's Details", de: "", en: "", act: actClear}, + {style: "HLCpat-Table-Recitals-PartyRoles", text: "- Proactive Party's Roles -", de: "– {{caption.claimant_designation}} –", en: "– {{caption.claimant_designation}} –", act: actPlaceholder}, + {style: "HLCpat-Body-B0", text: "represented by", de: "vertreten durch", en: "represented by", act: actPlaceholder}, + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Representative Details (Name, Address etc)", de: "{{parties.claimant.representative}}", en: "{{parties.claimant.representative}}", act: actPlaceholder}, + + // Versus connector. + {style: "HLCpat-Table-Recitals-Sequencers", text: "against", de: "{{caption.versus}}", en: "{{caption.versus}}", act: actPlaceholder}, + + // Defendant 1 (Reactive Party 1). + {style: "HLCpat-Table-Recitals-Party", text: "Reactive Party 1", de: "{{parties.defendant.name}}", en: "{{parties.defendant.name}}", act: actPlaceholder}, + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Reactive Party 1's Details", de: "", en: "", act: actClear}, + {style: "HLCpat-Table-Recitals-PartyRoles", text: "- Reactive Party 1's Roles -", de: "– {{caption.defendant_designation}} –", en: "– {{caption.defendant_designation}} –", act: actPlaceholder}, + + // Defendant 2 (Reactive Party 2) → the "further party" slot. + {style: "HLCpat-Table-Recitals-Party", text: "Reactive Party 2", de: "{{parties.other.name}}", en: "{{parties.other.name}}", act: actPlaceholder}, + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Reactive Party 2's Details", de: "", en: "", act: actClear}, + {style: "HLCpat-Table-Recitals-PartyRoles", text: "- Reactive Party 2's Roles -", de: "– {{caption.defendant_designation}} –", en: "– {{caption.defendant_designation}} –", act: actPlaceholder}, + {style: "HLCpat-Body-B0", text: "represented by", de: "vertreten durch", en: "represented by", act: actPlaceholder}, + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Representative Details (Name, Address etc)", de: "{{parties.defendant.representative}}", en: "{{parties.defendant.representative}}", act: actPlaceholder}, + + // Service address: keep the firm hub, translate the label. + {style: "HLCpat-Table-Recitals-PartyDetails", text: "Electronic address for service: upc-hub@hoganlovells.com", de: "Zustellungsanschrift: upc-hub@hoganlovells.com", en: "Electronic address for service: upc-hub@hoganlovells.com", act: actPlaceholder}, + + // Subject ("wegen" / "relating to ..."). + {style: "HLCpat-Table-Recitals-Sequencers", text: "relating to alleged patent infringement", de: "{{caption.subject}}", en: "{{caption.subject}}", act: actPlaceholder}, +} + +// transformBody walks every in the kept region in document order, +// applying the first unconsumed exampleRule whose (style, normalised text) +// matches. Paragraphs with no matching rule — and all the table/row/cell +// scaffolding between paragraphs — pass through verbatim. +func transformBody(region, lang string) string { + rules := make([]rule, len(exampleRules)) + copy(rules, exampleRules) + + var b strings.Builder + last := 0 + for _, loc := range paragraphRe.FindAllStringIndex(region, -1) { + b.WriteString(region[last:loc[0]]) // scaffolding before this paragraph + para := region[loc[0]:loc[1]] + b.WriteString(applyRules(para, lang, rules)) + last = loc[1] + } + b.WriteString(region[last:]) return b.String() } -// para writes one paragraph with the given paragraph style. The full line -// (static label + any {{key}} placeholders) goes in a single run/text node; -// the merge renderer's pass-1 substitutes each placeholder inside the node -// in place (format-preserving), so no per-placeholder run splitting is -// needed here. -func para(b *strings.Builder, style, text string) { - b.WriteString(``) - b.WriteString(xmlEscape(text)) - b.WriteString(``) +func applyRules(para, lang string, rules []rule) string { + style := "" + if m := pStyleRe.FindStringSubmatch(para); m != nil { + style = m[1] + } + norm := normalise(flattenText(para)) + for i := range rules { + r := &rules[i] + if r.used || r.style != style || r.text != norm { + continue + } + r.used = true + repl := r.en + if lang == "de" { + repl = r.de + } + if r.act == actClear || repl == "" { + return emptyParagraph(para) + } + return replaceParagraphText(para, repl) + } + return para // no rule — keep authored layout verbatim } -// headingNoNum writes a heading paragraph that suppresses the heading -// style's auto-numbering (the firm Heading-H1/H2 styles carry a numbered -// outline list; a Rubrum caption/section title must not render "1.1."). A -// paragraph-level numId=0 override removes the paragraph from any list while -// keeping the heading's font/spacing. -func headingNoNum(b *strings.Builder, style, text string) { +// flattenText concatenates every run text in a paragraph. +func flattenText(para string) string { + var sb strings.Builder + for _, t := range wtRe.FindAllString(para, -1) { + sb.WriteString(tagRe.ReplaceAllString(t, "")) + } + return sb.String() +} + +// normalise collapses nbsp + whitespace runs to a single space, trims, and +// folds curly apostrophes/dashes so the source's run-fragmented, prettified +// text matches the rule keys. +func normalise(s string) string { + s = strings.ReplaceAll(s, " ", " ") + s = strings.ReplaceAll(s, "’", "'") + s = strings.ReplaceAll(s, "‘", "'") + s = strings.ReplaceAll(s, "–", "-") + s = strings.ReplaceAll(s, "—", "-") + s = strings.Join(strings.Fields(s), " ") + return strings.TrimSpace(s) +} + +// replaceParagraphText keeps the paragraph's open tag + (style, +// numbering suppression, indents) verbatim and replaces the whole run +// sequence with a single run carrying text. The named HLCpat- paragraph +// style supplies the formatting, so no run properties are authored. +func replaceParagraphText(para, text string) string { + head := paragraphHead(para) + return head + `` + xmlEscape(text) + `` +} + +// emptyParagraph strips a paragraph down to its styled mark (open tag + +// pPr), so a cleared field keeps its place + style but renders blank. +func emptyParagraph(para string) string { + return paragraphHead(para) + `` +} + +// paragraphHead returns the paragraph up to and including its (or +// just the open tag when the paragraph has no pPr). +func paragraphHead(para string) string { + if m := pPrEndRe.FindString(para); m != "" { + return m + } + if m := pOpenRe.FindString(para); m != "" { + return m + } + return `` +} + +// buildClosing appends a minimal authoring area below the recitals — an +// empty body line, a one-line hint, and a signature block — using the firm's +// own HLCpat- styles (style references only, no formatting). The actual +// submission text is written here by the lawyer / filled by per-code +// templates; the firm-skeleton is the merge-path fallback. +func buildClosing(lang, prefix string) string { + body0 := prefix + "Body-B0" + sig := prefix + "Signature" + + hint := "[Hier folgt der Schriftsatztext. Diese Skelett-Vorlage trägt den HLC-Briefkopf und das Rubrum — bitte gemäß Schriftsatz-Typ ergänzen.]" + if lang == "en" { + hint = "[The body of the submission goes here. This skeleton carries the HLC letterhead and Rubrum — complete it according to the submission type.]" + } + + var b strings.Builder + styledPara(&b, body0, "") + styledPara(&b, body0, hint) + styledPara(&b, body0, "") + styledPara(&b, sig, "{{user.display_name}}") + styledPara(&b, sig, "{{firm.signature_block}}") + return b.String() +} + +func styledPara(b *strings.Builder, style, text string) { b.WriteString(``) - b.WriteString(xmlEscape(text)) - b.WriteString(``) + b.WriteString(`"/>`) + if text != "" { + b.WriteString(``) + b.WriteString(xmlEscape(text)) + b.WriteString(``) + } + b.WriteString(``) } func xmlEscape(s string) string {