Merge: submission-md placeholder underscores preserved
Some checks failed
Paliad CI gate / build (push) Has been cancelled
Paliad CI gate / test-go (push) Has been cancelled
Paliad CI gate / deploy (push) Has been cancelled

This commit is contained in:
mAi
2026-05-28 00:01:30 +02:00
2 changed files with 101 additions and 0 deletions

View File

@@ -405,6 +405,23 @@ func parseInlineSpans(text string) []inlineSpan {
i := 0
n := len(text)
for i < n {
// Preserve {{...}} placeholders verbatim. Underscores and
// other Markdown-significant chars inside a placeholder key
// (e.g. {{project.case_number}}) must not be interpreted as
// bold/italic delimiters — otherwise the key gets stripped of
// its underscores and the v1 placeholder pass looks up the
// wrong key, surfacing [KEIN WERT: project.casenumber] in the
// preview.
if i+1 < n && text[i] == '{' && text[i+1] == '{' {
rel := strings.Index(text[i+2:], "}}")
if rel >= 0 {
end := i + 2 + rel + 2
cur.WriteString(text[i:end])
i = end
continue
}
// Unmatched {{ — fall through to plain character handling.
}
// Bold delimiters first (longer match wins over italic).
if i+1 < n && (text[i:i+2] == "**" || text[i:i+2] == "__") {
flush()

View File

@@ -86,6 +86,90 @@ func TestRenderMarkdownToOOXML_PlaceholdersPassThrough(t *testing.T) {
}
}
func TestRenderMarkdownToOOXML_PlaceholderUnderscoresPreserved(t *testing.T) {
// Regression: a placeholder key containing underscores (project.case_number,
// user.display_name, project.patent_number_upc) used to get its underscores
// consumed by the italic/bold inline scanner — the OOXML stored
// {{project.casenumber}} and the preview surfaced
// [KEIN WERT: project.casenumber] instead of the real value.
cases := []string{
"{{project.case_number}}",
"{{user.display_name}}",
"{{project.patent_number_upc}}",
"prefix {{project.case_number}} suffix",
"two: {{a.b_c}} and {{d.e_f}}",
"mixed: _italic_ then {{project.case_number}} then __bold__",
}
for _, in := range cases {
out := RenderMarkdownToOOXML(in, "Normal")
// Every placeholder substring in the input must appear verbatim
// in the output (XML escaping is irrelevant for {} and _).
for _, ph := range extractPlaceholders(in) {
if !strings.Contains(out, ph) {
t.Errorf("input %q: placeholder %q lost; got %q", in, ph, out)
}
}
}
}
func TestParseInlineSpans_PlaceholderWithUnderscoresIsLiteral(t *testing.T) {
// Direct guard on the inline scanner. {{project.case_number}} must
// emit as a single non-italic span containing the full placeholder.
spans := parseInlineSpans("{{project.case_number}}")
if len(spans) != 1 {
t.Fatalf("expected 1 span; got %d (%+v)", len(spans), spans)
}
if spans[0].Italic || spans[0].Bold {
t.Errorf("placeholder must not be italic/bold; got %+v", spans[0])
}
if spans[0].Text != "{{project.case_number}}" {
t.Errorf("placeholder text corrupted: got %q", spans[0].Text)
}
}
func TestParseInlineSpans_ItalicAroundPlaceholder(t *testing.T) {
// Italic delimiters outside a placeholder still work; the placeholder
// itself stays literal even when it sits between italics.
spans := parseInlineSpans("_before_ {{x.y_z}} _after_")
var saw struct {
italicBefore bool
placeholder bool
italicAfter bool
}
for _, s := range spans {
if s.Italic && s.Text == "before" {
saw.italicBefore = true
}
if !s.Italic && !s.Bold && strings.Contains(s.Text, "{{x.y_z}}") {
saw.placeholder = true
}
if s.Italic && s.Text == "after" {
saw.italicAfter = true
}
}
if !saw.italicBefore || !saw.placeholder || !saw.italicAfter {
t.Errorf("expected italic/placeholder/italic structure; got %+v", spans)
}
}
// extractPlaceholders pulls every {{...}} occurrence out of a Markdown
// source. Tiny helper, only used by the regression test above.
func extractPlaceholders(s string) []string {
var out []string
for {
start := strings.Index(s, "{{")
if start < 0 {
return out
}
end := strings.Index(s[start+2:], "}}")
if end < 0 {
return out
}
out = append(out, s[start:start+2+end+2])
s = s[start+2+end+2:]
}
}
func TestRenderMarkdownToOOXML_XMLEscape(t *testing.T) {
out := RenderMarkdownToOOXML("a & b < c > d", "")
if strings.Contains(out, " & ") {