The final slice: land the format-neutral document model with REAL consumers
and unify the Markdown parser — no duplication, byte-identical output.
Neutral model (pkg/docforge/model.go): Document / Block / InlineSpan.
BlockKind values are the stylemap keys. A hyperlink is a span with Link set
+ Children (the label's spans), preserving link boundaries so adjacent
same-URL links stay distinct — byte-exact with the pre-model walker.
Markdown importer (pkg/docforge/markdown): Import(md) → Document. The SINGLE
Markdown parser for docforge — block split, marker detection, inline
bold/italic/link tokenisation, {{placeholder}} pass-through (the b78a984
fix). Relocated out of the docx walker.
docx renderer (pkg/docforge/docx/markdown.go): now RENDERS a Document →
OOXML (RenderDocumentToOOXML); RenderMarkdownToOOXML[WithStyles] = render(
markdown.Import(md)). The shipped submission walker routes through the model,
so there is one parser, not two. The comprehensive byte-exact render tests
(RenderMarkdownToOOXML_*) all PASS unchanged = output identical.
Exporter interface (pkg/docforge/exporter.go, PRD §4 B4): Exporter{Format,
MIMEType, RenderBody(Document)} with the .docx impl (pkg/docforge/docx/
exporter.go). The seam a future PDF/HTML exporter slots into.
Tests: parser tests relocated to the markdown pkg (parseSpans/detectBlockMarker)
+ new importer Document tests + exporter conformance test.
Verification: go build/vet clean; gofmt clean; full NO-DB test suite GREEN
(authoritative — proves no regression); docforge byte-exact render oracle
PASS; composer live test renders through the rewired walker (PASS); bun build
+ bun test 274/274. The shared-DB live run fails ~85 tests across unrelated
services from a harness pq-42P08 $1-type seeding quirk + a stale
deadline_rules test — systemic/environmental (the no-DB run is clean), not
this change.
docforge train complete: 8 slices, the engine extracted + cleaned + a working
author→generate→export loop on uploaded templates, plus the neutral model +
importer + exporter seam for future formats/consumers.
m/paliad#157
189 lines
6.5 KiB
Go
189 lines
6.5 KiB
Go
package docx
|
|
|
|
// Markdown → OOXML rendering for Composer section content (t-paliad-313
|
|
// Slice B/D; restructured in t-paliad-349 slice 8).
|
|
//
|
|
// Parsing now lives in pkg/docforge/markdown, which produces the neutral
|
|
// docforge.Document. This file renders that Document into OOXML paragraph
|
|
// elements (<w:p>…</w:p>) ready to splice into a .docx body. There is one
|
|
// Markdown parser for docforge; this is the .docx exporter for its model.
|
|
//
|
|
// Output uses the base's stylemap entry for each block kind on the
|
|
// <w:pStyle>, so styling matches the base's typography (HLpat-Body-B0 on
|
|
// the HLC base, Normal on the neutral base, etc.). Placeholders ({{key}})
|
|
// ride through as literal run text and are substituted by the placeholder
|
|
// pass after assembly.
|
|
|
|
import (
|
|
"strconv"
|
|
"strings"
|
|
|
|
"mgit.msbls.de/m/paliad/pkg/docforge"
|
|
"mgit.msbls.de/m/paliad/pkg/docforge/markdown"
|
|
)
|
|
|
|
// HyperlinkAllocator hands the renderer a `rId` for each external URL it
|
|
// encounters in `[label](url)` inline links. The composer's post-pass uses
|
|
// these allocations to mutate `word/_rels/document.xml.rels` so the emitted
|
|
// `<w:hyperlink r:id="…">` elements resolve. Pass nil to drop links to
|
|
// plain text (the label survives, the URL doesn't render). t-paliad-316.
|
|
type HyperlinkAllocator func(url string) string
|
|
|
|
// RenderMarkdownToOOXML renders Markdown into OOXML paragraphs with a
|
|
// single paragraph style. Slice B back-compat wrapper.
|
|
func RenderMarkdownToOOXML(md, paragraphStyle string) string {
|
|
return RenderMarkdownToOOXMLWithStyles(md, map[string]string{"paragraph": paragraphStyle}, nil)
|
|
}
|
|
|
|
// RenderMarkdownToOOXMLWithStyles parses Markdown into a docforge.Document
|
|
// and renders it to OOXML. stylemap maps each block kind (paragraph,
|
|
// heading_1/2/3, list_bullet, list_numbered, blockquote) to a Word
|
|
// paragraph style; missing entries fall back to the "paragraph" style.
|
|
func RenderMarkdownToOOXMLWithStyles(md string, stylemap map[string]string, links HyperlinkAllocator) string {
|
|
return RenderDocumentToOOXML(markdown.Import(md), stylemap, links)
|
|
}
|
|
|
|
// RenderDocumentToOOXML renders a neutral Document to OOXML paragraphs —
|
|
// the .docx side of the docforge importer→model→exporter pipeline. Any
|
|
// Document (Markdown today, a foreign-doc importer later) renders the same
|
|
// way.
|
|
func RenderDocumentToOOXML(doc docforge.Document, stylemap map[string]string, links HyperlinkAllocator) string {
|
|
defaultStyle := stylemap["paragraph"]
|
|
// Numbered-list counter resets on every non-numbered block so
|
|
// "1. A\n2. B\n\n1. C" renders 1./2./1. — the input determined the
|
|
// ordinal, the renderer just emits it.
|
|
numbered := 0
|
|
var b strings.Builder
|
|
for _, blk := range doc.Blocks {
|
|
style := stylemap[string(blk.Kind)]
|
|
if style == "" {
|
|
style = defaultStyle
|
|
}
|
|
if blk.Kind == docforge.KindListNumbered {
|
|
numbered++
|
|
} else {
|
|
numbered = 0
|
|
}
|
|
b.WriteString(renderBlock(blk, style, links, numbered))
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// renderBlock emits one <w:p> for a block. List blocks get a visible
|
|
// "• " / "N. " prefix run (the base stylemap handles indentation if it
|
|
// defines a list style; the prefix at least surfaces the structure).
|
|
func renderBlock(blk docforge.Block, paragraphStyle string, links HyperlinkAllocator, numberedOrdinal int) string {
|
|
var b strings.Builder
|
|
b.WriteString(`<w:p>`)
|
|
if paragraphStyle != "" {
|
|
b.WriteString(`<w:pPr><w:pStyle w:val="`)
|
|
b.WriteString(xmlAttrEscape(paragraphStyle))
|
|
b.WriteString(`"/></w:pPr>`)
|
|
}
|
|
// An empty block is an intentional empty paragraph: one empty run.
|
|
if len(blk.Spans) == 0 {
|
|
b.WriteString(`<w:r><w:t xml:space="preserve"></w:t></w:r></w:p>`)
|
|
return b.String()
|
|
}
|
|
switch blk.Kind {
|
|
case docforge.KindListBullet:
|
|
b.WriteString(`<w:r><w:t xml:space="preserve">• </w:t></w:r>`)
|
|
case docforge.KindListNumbered:
|
|
ordinal := numberedOrdinal
|
|
if ordinal <= 0 {
|
|
ordinal = 1
|
|
}
|
|
b.WriteString(`<w:r><w:t xml:space="preserve">`)
|
|
b.WriteString(strconv.Itoa(ordinal))
|
|
b.WriteString(`. </w:t></w:r>`)
|
|
}
|
|
for _, span := range blk.Spans {
|
|
b.WriteString(renderInlineSpan(span, links))
|
|
}
|
|
b.WriteString(`</w:p>`)
|
|
return b.String()
|
|
}
|
|
|
|
// renderInlineSpan emits one span. A hyperlink span (Link != "") becomes a
|
|
// <w:hyperlink r:id="…"> wrapping its children when an allocator yields a
|
|
// rId; otherwise the label children render as plain runs (URL dropped).
|
|
func renderInlineSpan(span docforge.InlineSpan, links HyperlinkAllocator) string {
|
|
if span.Link != "" {
|
|
if links != nil {
|
|
if rid := links(span.Link); rid != "" {
|
|
var hb strings.Builder
|
|
hb.WriteString(`<w:hyperlink r:id="`)
|
|
hb.WriteString(xmlAttrEscape(rid))
|
|
hb.WriteString(`">`)
|
|
for _, child := range span.Children {
|
|
hb.WriteString(renderRunWithLinkStyle(child))
|
|
}
|
|
hb.WriteString(`</w:hyperlink>`)
|
|
return hb.String()
|
|
}
|
|
}
|
|
// No allocator / no rId — render the label as plain runs.
|
|
var fb strings.Builder
|
|
for _, child := range span.Children {
|
|
fb.WriteString(renderRun(child))
|
|
}
|
|
return fb.String()
|
|
}
|
|
return renderRun(span)
|
|
}
|
|
|
|
// renderRunWithLinkStyle emits a hyperlink child run with Word's built-in
|
|
// "Hyperlink" character style (colour + underline), plus B/I.
|
|
func renderRunWithLinkStyle(span docforge.InlineSpan) string {
|
|
var b strings.Builder
|
|
b.WriteString(`<w:r><w:rPr><w:rStyle w:val="Hyperlink"/>`)
|
|
if span.Bold {
|
|
b.WriteString(`<w:b/>`)
|
|
}
|
|
if span.Italic {
|
|
b.WriteString(`<w:i/>`)
|
|
}
|
|
b.WriteString(`</w:rPr><w:t xml:space="preserve">`)
|
|
b.WriteString(xmlTextEscape(span.Text))
|
|
b.WriteString(`</w:t></w:r>`)
|
|
return b.String()
|
|
}
|
|
|
|
// renderRun emits one <w:r> for a plain (text/bold/italic) span.
|
|
func renderRun(span docforge.InlineSpan) string {
|
|
var b strings.Builder
|
|
b.WriteString(`<w:r>`)
|
|
if span.Bold || span.Italic {
|
|
b.WriteString(`<w:rPr>`)
|
|
if span.Bold {
|
|
b.WriteString(`<w:b/>`)
|
|
}
|
|
if span.Italic {
|
|
b.WriteString(`<w:i/>`)
|
|
}
|
|
b.WriteString(`</w:rPr>`)
|
|
}
|
|
b.WriteString(`<w:t xml:space="preserve">`)
|
|
b.WriteString(xmlTextEscape(span.Text))
|
|
b.WriteString(`</w:t></w:r>`)
|
|
return b.String()
|
|
}
|
|
|
|
// xmlTextEscape escapes the XML-significant characters for <w:t> content.
|
|
// Quotes/apostrophes are legal in element text — not escaped.
|
|
func xmlTextEscape(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
return s
|
|
}
|
|
|
|
// xmlAttrEscape escapes for an attribute value (e.g. <w:pStyle w:val="…"/>).
|
|
func xmlAttrEscape(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
s = strings.ReplaceAll(s, `"`, """)
|
|
return s
|
|
}
|