Files
paliad/pkg/docforge/docx/markdown.go
mAi 8763ab013c feat(docforge): slice 8 — neutral model + Markdown importer + Exporter iface (t-paliad-349)
The final slice: land the format-neutral document model with REAL consumers
and unify the Markdown parser — no duplication, byte-identical output.

Neutral model (pkg/docforge/model.go): Document / Block / InlineSpan.
BlockKind values are the stylemap keys. A hyperlink is a span with Link set
+ Children (the label's spans), preserving link boundaries so adjacent
same-URL links stay distinct — byte-exact with the pre-model walker.

Markdown importer (pkg/docforge/markdown): Import(md) → Document. The SINGLE
Markdown parser for docforge — block split, marker detection, inline
bold/italic/link tokenisation, {{placeholder}} pass-through (the b78a984
fix). Relocated out of the docx walker.

docx renderer (pkg/docforge/docx/markdown.go): now RENDERS a Document →
OOXML (RenderDocumentToOOXML); RenderMarkdownToOOXML[WithStyles] = render(
markdown.Import(md)). The shipped submission walker routes through the model,
so there is one parser, not two. The comprehensive byte-exact render tests
(RenderMarkdownToOOXML_*) all PASS unchanged = output identical.

Exporter interface (pkg/docforge/exporter.go, PRD §4 B4): Exporter{Format,
MIMEType, RenderBody(Document)} with the .docx impl (pkg/docforge/docx/
exporter.go). The seam a future PDF/HTML exporter slots into.

Tests: parser tests relocated to the markdown pkg (parseSpans/detectBlockMarker)
+ new importer Document tests + exporter conformance test.

Verification: go build/vet clean; gofmt clean; full NO-DB test suite GREEN
(authoritative — proves no regression); docforge byte-exact render oracle
PASS; composer live test renders through the rewired walker (PASS); bun build
+ bun test 274/274. The shared-DB live run fails ~85 tests across unrelated
services from a harness pq-42P08 $1-type seeding quirk + a stale
deadline_rules test — systemic/environmental (the no-DB run is clean), not
this change.

docforge train complete: 8 slices, the engine extracted + cleaned + a working
author→generate→export loop on uploaded templates, plus the neutral model +
importer + exporter seam for future formats/consumers.

m/paliad#157
2026-05-29 18:10:16 +02:00

189 lines
6.5 KiB
Go

package docx
// Markdown → OOXML rendering for Composer section content (t-paliad-313
// Slice B/D; restructured in t-paliad-349 slice 8).
//
// Parsing now lives in pkg/docforge/markdown, which produces the neutral
// docforge.Document. This file renders that Document into OOXML paragraph
// elements (<w:p>…</w:p>) ready to splice into a .docx body. There is one
// Markdown parser for docforge; this is the .docx exporter for its model.
//
// Output uses the base's stylemap entry for each block kind on the
// <w:pStyle>, so styling matches the base's typography (HLpat-Body-B0 on
// the HLC base, Normal on the neutral base, etc.). Placeholders ({{key}})
// ride through as literal run text and are substituted by the placeholder
// pass after assembly.
import (
"strconv"
"strings"
"mgit.msbls.de/m/paliad/pkg/docforge"
"mgit.msbls.de/m/paliad/pkg/docforge/markdown"
)
// HyperlinkAllocator hands the renderer a `rId` for each external URL it
// encounters in `[label](url)` inline links. The composer's post-pass uses
// these allocations to mutate `word/_rels/document.xml.rels` so the emitted
// `<w:hyperlink r:id="…">` elements resolve. Pass nil to drop links to
// plain text (the label survives, the URL doesn't render). t-paliad-316.
type HyperlinkAllocator func(url string) string
// RenderMarkdownToOOXML renders Markdown into OOXML paragraphs with a
// single paragraph style. Slice B back-compat wrapper.
func RenderMarkdownToOOXML(md, paragraphStyle string) string {
return RenderMarkdownToOOXMLWithStyles(md, map[string]string{"paragraph": paragraphStyle}, nil)
}
// RenderMarkdownToOOXMLWithStyles parses Markdown into a docforge.Document
// and renders it to OOXML. stylemap maps each block kind (paragraph,
// heading_1/2/3, list_bullet, list_numbered, blockquote) to a Word
// paragraph style; missing entries fall back to the "paragraph" style.
func RenderMarkdownToOOXMLWithStyles(md string, stylemap map[string]string, links HyperlinkAllocator) string {
return RenderDocumentToOOXML(markdown.Import(md), stylemap, links)
}
// RenderDocumentToOOXML renders a neutral Document to OOXML paragraphs —
// the .docx side of the docforge importer→model→exporter pipeline. Any
// Document (Markdown today, a foreign-doc importer later) renders the same
// way.
func RenderDocumentToOOXML(doc docforge.Document, stylemap map[string]string, links HyperlinkAllocator) string {
defaultStyle := stylemap["paragraph"]
// Numbered-list counter resets on every non-numbered block so
// "1. A\n2. B\n\n1. C" renders 1./2./1. — the input determined the
// ordinal, the renderer just emits it.
numbered := 0
var b strings.Builder
for _, blk := range doc.Blocks {
style := stylemap[string(blk.Kind)]
if style == "" {
style = defaultStyle
}
if blk.Kind == docforge.KindListNumbered {
numbered++
} else {
numbered = 0
}
b.WriteString(renderBlock(blk, style, links, numbered))
}
return b.String()
}
// renderBlock emits one <w:p> for a block. List blocks get a visible
// "• " / "N. " prefix run (the base stylemap handles indentation if it
// defines a list style; the prefix at least surfaces the structure).
func renderBlock(blk docforge.Block, paragraphStyle string, links HyperlinkAllocator, numberedOrdinal int) string {
var b strings.Builder
b.WriteString(`<w:p>`)
if paragraphStyle != "" {
b.WriteString(`<w:pPr><w:pStyle w:val="`)
b.WriteString(xmlAttrEscape(paragraphStyle))
b.WriteString(`"/></w:pPr>`)
}
// An empty block is an intentional empty paragraph: one empty run.
if len(blk.Spans) == 0 {
b.WriteString(`<w:r><w:t xml:space="preserve"></w:t></w:r></w:p>`)
return b.String()
}
switch blk.Kind {
case docforge.KindListBullet:
b.WriteString(`<w:r><w:t xml:space="preserve">• </w:t></w:r>`)
case docforge.KindListNumbered:
ordinal := numberedOrdinal
if ordinal <= 0 {
ordinal = 1
}
b.WriteString(`<w:r><w:t xml:space="preserve">`)
b.WriteString(strconv.Itoa(ordinal))
b.WriteString(`. </w:t></w:r>`)
}
for _, span := range blk.Spans {
b.WriteString(renderInlineSpan(span, links))
}
b.WriteString(`</w:p>`)
return b.String()
}
// renderInlineSpan emits one span. A hyperlink span (Link != "") becomes a
// <w:hyperlink r:id="…"> wrapping its children when an allocator yields a
// rId; otherwise the label children render as plain runs (URL dropped).
func renderInlineSpan(span docforge.InlineSpan, links HyperlinkAllocator) string {
if span.Link != "" {
if links != nil {
if rid := links(span.Link); rid != "" {
var hb strings.Builder
hb.WriteString(`<w:hyperlink r:id="`)
hb.WriteString(xmlAttrEscape(rid))
hb.WriteString(`">`)
for _, child := range span.Children {
hb.WriteString(renderRunWithLinkStyle(child))
}
hb.WriteString(`</w:hyperlink>`)
return hb.String()
}
}
// No allocator / no rId — render the label as plain runs.
var fb strings.Builder
for _, child := range span.Children {
fb.WriteString(renderRun(child))
}
return fb.String()
}
return renderRun(span)
}
// renderRunWithLinkStyle emits a hyperlink child run with Word's built-in
// "Hyperlink" character style (colour + underline), plus B/I.
func renderRunWithLinkStyle(span docforge.InlineSpan) string {
var b strings.Builder
b.WriteString(`<w:r><w:rPr><w:rStyle w:val="Hyperlink"/>`)
if span.Bold {
b.WriteString(`<w:b/>`)
}
if span.Italic {
b.WriteString(`<w:i/>`)
}
b.WriteString(`</w:rPr><w:t xml:space="preserve">`)
b.WriteString(xmlTextEscape(span.Text))
b.WriteString(`</w:t></w:r>`)
return b.String()
}
// renderRun emits one <w:r> for a plain (text/bold/italic) span.
func renderRun(span docforge.InlineSpan) string {
var b strings.Builder
b.WriteString(`<w:r>`)
if span.Bold || span.Italic {
b.WriteString(`<w:rPr>`)
if span.Bold {
b.WriteString(`<w:b/>`)
}
if span.Italic {
b.WriteString(`<w:i/>`)
}
b.WriteString(`</w:rPr>`)
}
b.WriteString(`<w:t xml:space="preserve">`)
b.WriteString(xmlTextEscape(span.Text))
b.WriteString(`</w:t></w:r>`)
return b.String()
}
// xmlTextEscape escapes the XML-significant characters for <w:t> content.
// Quotes/apostrophes are legal in element text — not escaped.
func xmlTextEscape(s string) string {
s = strings.ReplaceAll(s, "&", "&amp;")
s = strings.ReplaceAll(s, "<", "&lt;")
s = strings.ReplaceAll(s, ">", "&gt;")
return s
}
// xmlAttrEscape escapes for an attribute value (e.g. <w:pStyle w:val="…"/>).
func xmlAttrEscape(s string) string {
s = strings.ReplaceAll(s, "&", "&amp;")
s = strings.ReplaceAll(s, "<", "&lt;")
s = strings.ReplaceAll(s, ">", "&gt;")
s = strings.ReplaceAll(s, `"`, "&quot;")
return s
}