Files
onepager/tools/test-anti-ai-lint.sh
mAi fdac496a6f mAi: #10 - Anti-AI-Text-Lint im Build
tools/anti-ai-lint.py: Python-Linter (stdlib + yq) prueft jede
build/<domain>/index.html gegen die Blacklist in
tools/anti-ai-blacklist.yaml. HTML wird via html.parser auf sichtbaren
Text reduziert (Skripte/Styles werden ignoriert), dann werden Vokabel-
Substrings (DE+EN, case-insensitive) und Regex-Patterns gematcht.
Severity warn = Build geht durch, fail = Build bricht ab.

Whitelist-Mechanismen:
- HTML-Kommentar im Markup: <!-- anti-ai-allow: term1, term2 -->
- Per-Site in site.yaml: anti_ai_allow: [term1, term2]

Integration in build.sh als Schritt 4/4, mit --skip-lint fuer
Notfaelle. Dockerfile installiert python3 zusaetzlich; nur im
Builder-Stage, kein Effekt aufs Caddy-Image.

Tests via tools/test-anti-ai-lint.sh: synthetische AI-Fixture wird
korrekt geflagged, Whitelists unterdruecken Hits, fail-Severity
triggert exit 1, neutraler Text exit 0.

Initial-Lauf auf 59 bestehenden Sites: 2 warn (killusion.de
"revolutionaer" in ironischem Kontext, kilofant.de "robust"),
0 fail. Cleanup ist Folge-Issue.

README + docs/geo-seo-guideline.md aktualisiert mit der konkreten
Tool-Position.
2026-04-30 02:50:50 +02:00

108 lines
3.4 KiB
Bash
Executable File

#!/bin/bash
# Self-test for tools/anti-ai-lint.py.
# Builds a synthetic AI-text fixture in a temp dir, asserts the linter
# flags it, then verifies whitelist comments suppress the hit.
set -euo pipefail
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
LINT="$SCRIPT_DIR/anti-ai-lint.py"
tmp=$(mktemp -d)
trap 'rm -rf "$tmp"' EXIT
mkdir -p "$tmp/build/synthetic-ai.test"
cat > "$tmp/build/synthetic-ai.test/index.html" <<'HTML'
<!DOCTYPE html>
<html lang="en">
<head>
<title>Synthetic AI sample</title>
<style>.foo { color: red; } /* leverage in CSS comment must be ignored */</style>
<script>const x = "leverage"; // in JS, must be ignored</script>
</head>
<body>
<h1>In today's evolving landscape</h1>
<p>This is a comprehensive, robust, holistic solution that lets us leverage emerging trends.</p>
<p>We delve into the intricate tapestry of AI to navigate this pivotal moment.</p>
<h2>Challenges and Future Prospects</h2>
<ul>
<li>Effizienz: hoch — Skalierbarkeit: gut — Sicherheit: solide</li>
</ul>
</body>
</html>
HTML
expect_finding() {
# expect_finding <json> <name>
python3 -c '
import json, sys
data = json.loads(sys.argv[1])
target = sys.argv[2]
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
if len(hits) != 1:
print(f"expected exactly 1 finding for {target!r}, got {len(hits)}", file=sys.stderr)
sys.exit(1)
' "$1" "$2"
}
expect_no_finding() {
python3 -c '
import json, sys
data = json.loads(sys.argv[1])
target = sys.argv[2]
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
if hits:
print(f"unexpected finding for {target!r}: {hits}", file=sys.stderr)
sys.exit(1)
' "$1" "$2"
}
echo "[1] expecting FAIL on synthetic AI fixture..."
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) && rc=0 || rc=$?
if [ "$rc" -ne 1 ]; then
echo "FAIL: expected exit 1, got $rc" >&2
echo "$report" >&2
exit 1
fi
for term in "in today's evolving landscape" "Challenges and Future Prospects" \
"leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
expect_finding "$report" "$term" || exit 1
done
echo " OK"
echo "[2] expecting whitelist comment to suppress hits..."
sed -i '4a\ <!-- anti-ai-allow: leverage, comprehensive, delve, em-dash-3-bullet -->' \
"$tmp/build/synthetic-ai.test/index.html"
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) || true
for term in "leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
expect_no_finding "$report" "$term" || exit 1
done
# fail-level "in today's evolving landscape" should still be reported
expect_finding "$report" "in today's evolving landscape" || exit 1
echo " OK"
echo "[3] expecting fail-level hit still triggers exit 1..."
python3 "$LINT" "$tmp/build" >/dev/null 2>&1 && rc=0 || rc=$?
if [ "$rc" -ne 1 ]; then
echo "FAIL: expected exit 1, got $rc" >&2
exit 1
fi
echo " OK"
echo "[4] expecting clean exit on neutral fixture..."
rm "$tmp/build/synthetic-ai.test/index.html"
mkdir -p "$tmp/build/clean.test"
echo '<!DOCTYPE html><html lang="de"><body><p>Ein einfacher Satz ohne KI-Vokabular.</p></body></html>' \
> "$tmp/build/clean.test/index.html"
rm -rf "$tmp/build/synthetic-ai.test"
out=$(python3 "$LINT" "$tmp/build" 2>&1) && rc=0 || rc=$?
if [ "$rc" -ne 0 ]; then
echo "FAIL: clean fixture should exit 0, got $rc" >&2
echo "$out"
exit 1
fi
echo " OK"
echo
echo "all tests passed"