Merge branch 'mai/artemis/issue-10-anti-ai-lint': Anti-AI-Lint im Build (#10)
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
FROM alpine:3.21 AS builder
|
FROM alpine:3.21 AS builder
|
||||||
|
|
||||||
RUN apk add --no-cache bash yq coreutils findutils
|
RUN apk add --no-cache bash yq coreutils findutils python3
|
||||||
|
|
||||||
WORKDIR /src
|
WORKDIR /src
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
26
README.md
26
README.md
@@ -31,10 +31,32 @@ build/ # Generated output (gitignored)
|
|||||||
### Build
|
### Build
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./build.sh
|
./build.sh # build + anti-AI text lint
|
||||||
|
./build.sh --skip-lint # build only (emergencies)
|
||||||
```
|
```
|
||||||
|
|
||||||
Requires `yq` for YAML parsing. Outputs to `build/` directory.
|
Requires `yq` for YAML parsing and `python3` for the lint step. Outputs to `build/`.
|
||||||
|
|
||||||
|
### Anti-AI text lint
|
||||||
|
|
||||||
|
Every build runs `tools/anti-ai-lint.py` against `build/<domain>/index.html`,
|
||||||
|
flagging text fingerprints typical of LLM-generated content (vocab and structure
|
||||||
|
patterns from `tools/anti-ai-blacklist.yaml`). Severity `warn` prints a message;
|
||||||
|
`fail` aborts the build.
|
||||||
|
|
||||||
|
Whitelist a hit:
|
||||||
|
|
||||||
|
- HTML comment in the affected page:
|
||||||
|
`<!-- anti-ai-allow: revolutionär, em-dash-3-bullet -->`
|
||||||
|
- Per-site override in `site.yaml`:
|
||||||
|
```yaml
|
||||||
|
anti_ai_allow:
|
||||||
|
- revolutionär
|
||||||
|
- em-dash-3-bullet
|
||||||
|
```
|
||||||
|
|
||||||
|
The blacklist source is `docs/geo-seo-guideline.md` §3.6. Test the linter with
|
||||||
|
`tools/test-anti-ai-lint.sh`.
|
||||||
|
|
||||||
### Deploy
|
### Deploy
|
||||||
|
|
||||||
|
|||||||
25
build.sh
25
build.sh
@@ -5,6 +5,19 @@ set -euo pipefail
|
|||||||
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
||||||
BUILD_DIR="$SCRIPT_DIR/build"
|
BUILD_DIR="$SCRIPT_DIR/build"
|
||||||
|
|
||||||
|
skip_lint=0
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--skip-lint) skip_lint=1 ;;
|
||||||
|
-h|--help)
|
||||||
|
echo "Usage: $0 [--skip-lint]"
|
||||||
|
echo " --skip-lint Skip the anti-AI text lint step (emergencies only)."
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*) echo "Unknown argument: $arg" >&2; exit 2 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
echo "=== Onepager Build ==="
|
echo "=== Onepager Build ==="
|
||||||
|
|
||||||
# Clean build directory
|
# Clean build directory
|
||||||
@@ -50,6 +63,14 @@ echo "[3/3] Copying shared assets..."
|
|||||||
cp -r "$SCRIPT_DIR/shared" "$BUILD_DIR/shared"
|
cp -r "$SCRIPT_DIR/shared" "$BUILD_DIR/shared"
|
||||||
echo " -> shared/ copied"
|
echo " -> shared/ copied"
|
||||||
|
|
||||||
# 4. Report
|
# 4. Anti-AI text lint
|
||||||
echo "[4/4] Build complete"
|
if [ "$skip_lint" -eq 1 ]; then
|
||||||
|
echo "[4/4] Anti-AI lint skipped (--skip-lint)"
|
||||||
|
elif ! command -v python3 >/dev/null 2>&1; then
|
||||||
|
echo "[4/4] python3 not found — skipping anti-AI lint"
|
||||||
|
else
|
||||||
|
echo "[4/4] Anti-AI text lint..."
|
||||||
|
python3 "$SCRIPT_DIR/tools/anti-ai-lint.py" "$BUILD_DIR"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "=== Build complete: $count sites ==="
|
echo "=== Build complete: $count sites ==="
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ Mehrere dieser Wörter im selben Absatz sind das stärkste Tell.
|
|||||||
|
|
||||||
**Praktische Umsetzung:**
|
**Praktische Umsetzung:**
|
||||||
|
|
||||||
- Lint-Skript im Build (`scripts/anti-ai-lint.sh`) das Vokabel-Blacklist über alle `index.html` und `*.md` läuft, mit Schwellenwert (z. B. mehr als 3 Marker pro 500 Wörter → Warnung).
|
- Lint-Skript im Build: für onepager implementiert in `tools/anti-ai-lint.py` mit Vokabel-Blacklist in `tools/anti-ai-blacklist.yaml`. Läuft am Ende von `build.sh` über alle `build/<domain>/index.html`. Severity `warn` (Build geht durch) vs. `fail` (Build bricht ab). Whitelist via `<!-- anti-ai-allow: term -->`-Kommentar oder `anti_ai_allow:`-Liste in `site.yaml`. Notfall-Override: `build.sh --skip-lint`.
|
||||||
- Bei AI-generierten Drafts: bewusst gegen die Blacklist redigieren.
|
- Bei AI-generierten Drafts: bewusst gegen die Blacklist redigieren.
|
||||||
- Vor Veröffentlichung laut lesen. Wenn es klingt wie ein Pressemitteilungs-Bot, ist es einer.
|
- Vor Veröffentlichung laut lesen. Wenn es klingt wie ein Pressemitteilungs-Bot, ist es einer.
|
||||||
|
|
||||||
|
|||||||
97
tools/anti-ai-blacklist.yaml
Normal file
97
tools/anti-ai-blacklist.yaml
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# Anti-AI lint rules: textual fingerprints typical of LLM-generated content.
|
||||||
|
#
|
||||||
|
# Severity:
|
||||||
|
# warn — build proceeds, message printed
|
||||||
|
# fail — build aborts (exit 1) unless build.sh --skip-lint
|
||||||
|
#
|
||||||
|
# Whitelisting matches:
|
||||||
|
# In an HTML file: <!-- anti-ai-allow: term -->
|
||||||
|
# <!-- anti-ai-allow: term1, term2 -->
|
||||||
|
# Per site (site.yaml): anti_ai_allow:
|
||||||
|
# - leverage
|
||||||
|
# - em-dash-3-bullet
|
||||||
|
#
|
||||||
|
# Vocab matches are case-insensitive substring matches against the visible
|
||||||
|
# text of the rendered HTML (script/style/comments stripped). Pattern matches
|
||||||
|
# are regex (Python re), case-insensitive by default, against the same.
|
||||||
|
#
|
||||||
|
# Source: docs/geo-seo-guideline.md §3.6 (Wikipedia AI-content signals).
|
||||||
|
|
||||||
|
vocab:
|
||||||
|
de:
|
||||||
|
warn:
|
||||||
|
- "nahtlos"
|
||||||
|
- "robust"
|
||||||
|
- "umfassend"
|
||||||
|
- "ganzheitlich"
|
||||||
|
- "fungiert als"
|
||||||
|
- "dient als Brücke"
|
||||||
|
- "Symbiose"
|
||||||
|
- "im Bereich der"
|
||||||
|
- "in der heutigen schnelllebigen"
|
||||||
|
- "ein Meilenstein"
|
||||||
|
- "ein Beweis für"
|
||||||
|
- "hat Spuren hinterlassen"
|
||||||
|
- "Es ist wichtig zu erwähnen"
|
||||||
|
- "Es ist wichtig zu beachten"
|
||||||
|
- "bahnbrechend"
|
||||||
|
- "revolutionär"
|
||||||
|
fail:
|
||||||
|
- "in der sich entwickelnden Landschaft"
|
||||||
|
- "Herausforderungen und Zukunftsaussichten"
|
||||||
|
- "Herausforderungen und Perspektiven"
|
||||||
|
|
||||||
|
en:
|
||||||
|
warn:
|
||||||
|
- "delve"
|
||||||
|
- "tapestry"
|
||||||
|
- "testament"
|
||||||
|
- "intricate"
|
||||||
|
- "garnered"
|
||||||
|
- "bolstered"
|
||||||
|
- "enduring"
|
||||||
|
- "robust"
|
||||||
|
- "comprehensive"
|
||||||
|
- "meticulous"
|
||||||
|
- "interplay"
|
||||||
|
- "pivotal"
|
||||||
|
- "underscore"
|
||||||
|
- "moreover"
|
||||||
|
- "furthermore"
|
||||||
|
- "additionally"
|
||||||
|
- "crucial"
|
||||||
|
- "showcasing"
|
||||||
|
- "highlighting"
|
||||||
|
- "leverage"
|
||||||
|
- "streamline"
|
||||||
|
- "holistic"
|
||||||
|
- "seamless"
|
||||||
|
- "unleash"
|
||||||
|
- "ecosystem"
|
||||||
|
- "in the realm of"
|
||||||
|
- "dive into"
|
||||||
|
- "It's important to note that"
|
||||||
|
- "It is important to note that"
|
||||||
|
- "In this article, we'll"
|
||||||
|
fail:
|
||||||
|
- "in today's evolving landscape"
|
||||||
|
- "in the ever-evolving landscape"
|
||||||
|
- "Challenges and Future Prospects"
|
||||||
|
|
||||||
|
patterns:
|
||||||
|
- name: em-dash-3-bullet
|
||||||
|
description: |
|
||||||
|
Three "Word: text — Word: text — Word: …" segments in one block.
|
||||||
|
Classic AI bullet pattern.
|
||||||
|
regex: '(\w[\w\s]{0,30}:\s+[^—\n]{2,80}—\s*){2,}\w[\w\s]{0,30}:'
|
||||||
|
severity: warn
|
||||||
|
|
||||||
|
- name: not-only-but-also
|
||||||
|
description: '"not only X, but also Y" / "nicht nur X, sondern auch Y" filler.'
|
||||||
|
regex: '\b(?:not only|nicht nur)\b[^.,;\n]{1,80}\b(?:but also|sondern auch)\b'
|
||||||
|
severity: warn
|
||||||
|
|
||||||
|
- name: as-an-ai
|
||||||
|
description: Leftover AI self-disclosure.
|
||||||
|
regex: '\b(?:as an? (?:AI|language model)|als (?:eine?|eine\s+)?(?:KI|Sprachmodell))\b'
|
||||||
|
severity: fail
|
||||||
294
tools/anti-ai-lint.py
Executable file
294
tools/anti-ai-lint.py
Executable file
@@ -0,0 +1,294 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""anti-ai-lint — flag AI-text fingerprints in built sites.
|
||||||
|
|
||||||
|
Reads tools/anti-ai-blacklist.yaml, walks build/<domain>/index.html, prints
|
||||||
|
findings. Exits 1 if any finding has severity=fail, else 0.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
tools/anti-ai-lint.py [--blacklist PATH] [--sources sites/] [--quiet]
|
||||||
|
[--json] [BUILD_DIR]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
|
||||||
|
def _ansi(code: str) -> str:
|
||||||
|
return code if sys.stdout.isatty() else ""
|
||||||
|
|
||||||
|
|
||||||
|
RED = _ansi("\033[31m")
|
||||||
|
YELLOW = _ansi("\033[33m")
|
||||||
|
GREEN = _ansi("\033[32m")
|
||||||
|
DIM = _ansi("\033[2m")
|
||||||
|
BOLD = _ansi("\033[1m")
|
||||||
|
RESET = _ansi("\033[0m")
|
||||||
|
|
||||||
|
|
||||||
|
class TextExtractor(HTMLParser):
|
||||||
|
"""Extract visible text and per-site allow directives."""
|
||||||
|
|
||||||
|
SKIP_TAGS = {"script", "style", "noscript", "template"}
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__(convert_charrefs=True)
|
||||||
|
self.skip_depth = 0
|
||||||
|
self.fragments: list[tuple[int, str]] = []
|
||||||
|
self.allows: set[str] = set()
|
||||||
|
self.html_lang: str | None = None
|
||||||
|
|
||||||
|
def handle_starttag(self, tag: str, attrs) -> None:
|
||||||
|
if tag == "html" and self.html_lang is None:
|
||||||
|
for k, v in attrs:
|
||||||
|
if k == "lang" and v:
|
||||||
|
self.html_lang = v.lower().split("-")[0]
|
||||||
|
break
|
||||||
|
if tag in self.SKIP_TAGS:
|
||||||
|
self.skip_depth += 1
|
||||||
|
|
||||||
|
def handle_startendtag(self, tag: str, attrs) -> None:
|
||||||
|
# Self-closing — never enters skip depth, no data either.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_endtag(self, tag: str) -> None:
|
||||||
|
if tag in self.SKIP_TAGS and self.skip_depth > 0:
|
||||||
|
self.skip_depth -= 1
|
||||||
|
|
||||||
|
def handle_data(self, data: str) -> None:
|
||||||
|
if self.skip_depth == 0 and data.strip():
|
||||||
|
line, _ = self.getpos()
|
||||||
|
self.fragments.append((line, data))
|
||||||
|
|
||||||
|
def handle_comment(self, data: str) -> None:
|
||||||
|
m = re.search(r"anti-ai-allow\s*:\s*(.+)", data, re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
for token in re.split(r"[,\s]+", m.group(1)):
|
||||||
|
token = token.strip()
|
||||||
|
if token:
|
||||||
|
self.allows.add(token)
|
||||||
|
self.allows.add(token.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def load_blacklist(path: str) -> dict:
|
||||||
|
"""Convert YAML to JSON via yq, parse with stdlib json."""
|
||||||
|
try:
|
||||||
|
out = subprocess.check_output(
|
||||||
|
["yq", "-o=json", path],
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
sys.exit("ERROR: yq not found in PATH (required to parse YAML blacklist)")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
sys.exit(f"ERROR: yq failed to parse {path}: {e.stderr.strip()}")
|
||||||
|
return json.loads(out)
|
||||||
|
|
||||||
|
|
||||||
|
def site_allow_yaml(site_yaml: str) -> list[str]:
|
||||||
|
if not os.path.isfile(site_yaml):
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
out = subprocess.check_output(
|
||||||
|
["yq", "-r", "(.anti_ai_allow // []) | .[]", site_yaml],
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return []
|
||||||
|
return [line.strip() for line in out.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def lint_file(html_path: str, blacklist: dict, extra_allows: list[str]) -> list[dict]:
|
||||||
|
with open(html_path, "r", encoding="utf-8", errors="replace") as f:
|
||||||
|
raw = f.read()
|
||||||
|
|
||||||
|
parser = TextExtractor()
|
||||||
|
try:
|
||||||
|
parser.feed(raw)
|
||||||
|
parser.close()
|
||||||
|
except Exception as e:
|
||||||
|
# Malformed HTML — record a single warning and skip.
|
||||||
|
return [{
|
||||||
|
"kind": "parse",
|
||||||
|
"name": "html-parse-error",
|
||||||
|
"severity": "warn",
|
||||||
|
"line": 0,
|
||||||
|
"snippet": str(e)[:120],
|
||||||
|
}]
|
||||||
|
|
||||||
|
allow_set = set(parser.allows)
|
||||||
|
for tok in extra_allows:
|
||||||
|
allow_set.add(tok)
|
||||||
|
allow_set.add(tok.lower())
|
||||||
|
|
||||||
|
findings: list[dict] = []
|
||||||
|
seen: set[tuple[str, str]] = set() # (kind, name) — one report per file
|
||||||
|
|
||||||
|
# Lint vocab in BOTH languages — sites may carry data-en attributes
|
||||||
|
# that surface translated text alongside the primary language.
|
||||||
|
vocab = blacklist.get("vocab") or {}
|
||||||
|
for lang in ("de", "en"):
|
||||||
|
bucket = vocab.get(lang) or {}
|
||||||
|
for severity in ("warn", "fail"):
|
||||||
|
for term in bucket.get(severity) or []:
|
||||||
|
key = ("vocab", term.lower())
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
if term in allow_set or term.lower() in allow_set:
|
||||||
|
continue
|
||||||
|
term_lc = term.lower()
|
||||||
|
for line_no, frag in parser.fragments:
|
||||||
|
if term_lc in frag.lower():
|
||||||
|
findings.append({
|
||||||
|
"kind": "vocab",
|
||||||
|
"lang": lang,
|
||||||
|
"name": term,
|
||||||
|
"severity": severity,
|
||||||
|
"line": line_no,
|
||||||
|
"snippet": frag.strip()[:120],
|
||||||
|
})
|
||||||
|
seen.add(key)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Patterns
|
||||||
|
for pat in blacklist.get("patterns") or []:
|
||||||
|
name = pat.get("name") or pat.get("regex", "")[:40]
|
||||||
|
key = ("pattern", name)
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
if name in allow_set or name.lower() in allow_set:
|
||||||
|
continue
|
||||||
|
flags = re.MULTILINE
|
||||||
|
if not pat.get("case_sensitive"):
|
||||||
|
flags |= re.IGNORECASE
|
||||||
|
try:
|
||||||
|
rx = re.compile(pat["regex"], flags)
|
||||||
|
except re.error as e:
|
||||||
|
findings.append({
|
||||||
|
"kind": "config",
|
||||||
|
"name": name,
|
||||||
|
"severity": "warn",
|
||||||
|
"line": 0,
|
||||||
|
"snippet": f"invalid regex: {e}",
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
for line_no, frag in parser.fragments:
|
||||||
|
m = rx.search(frag)
|
||||||
|
if m:
|
||||||
|
findings.append({
|
||||||
|
"kind": "pattern",
|
||||||
|
"name": name,
|
||||||
|
"severity": pat.get("severity", "warn"),
|
||||||
|
"line": line_no,
|
||||||
|
"snippet": (frag.strip()[:120] or m.group(0)[:120]),
|
||||||
|
})
|
||||||
|
seen.add(key)
|
||||||
|
break
|
||||||
|
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
here = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
repo = os.path.dirname(here)
|
||||||
|
|
||||||
|
ap = argparse.ArgumentParser(description="Flag AI-text fingerprints in built sites.")
|
||||||
|
ap.add_argument("build_dir", nargs="?", default=os.path.join(repo, "build"))
|
||||||
|
ap.add_argument("--blacklist", default=os.path.join(here, "anti-ai-blacklist.yaml"))
|
||||||
|
ap.add_argument("--sources", default=os.path.join(repo, "sites"),
|
||||||
|
help="sites/ root (for per-site site.yaml allow lists)")
|
||||||
|
ap.add_argument("--quiet", action="store_true",
|
||||||
|
help="Suppress warnings; only show fails.")
|
||||||
|
ap.add_argument("--json", action="store_true", help="Emit JSON report.")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
if not os.path.isdir(args.build_dir):
|
||||||
|
print(f"ERROR: build dir not found: {args.build_dir}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if not os.path.isfile(args.blacklist):
|
||||||
|
print(f"ERROR: blacklist not found: {args.blacklist}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
blacklist = load_blacklist(args.blacklist)
|
||||||
|
|
||||||
|
total_warn = 0
|
||||||
|
total_fail = 0
|
||||||
|
sites_with_findings = 0
|
||||||
|
sites_total = 0
|
||||||
|
json_sites: list[dict] = []
|
||||||
|
|
||||||
|
for entry in sorted(os.listdir(args.build_dir)):
|
||||||
|
site_dir = os.path.join(args.build_dir, entry)
|
||||||
|
html = os.path.join(site_dir, "index.html")
|
||||||
|
if not os.path.isfile(html):
|
||||||
|
continue
|
||||||
|
sites_total += 1
|
||||||
|
|
||||||
|
site_yaml = os.path.join(args.sources, entry, "site.yaml")
|
||||||
|
extra_allows = site_allow_yaml(site_yaml)
|
||||||
|
|
||||||
|
findings = lint_file(html, blacklist, extra_allows)
|
||||||
|
warns = [f for f in findings if f["severity"] == "warn"]
|
||||||
|
fails = [f for f in findings if f["severity"] == "fail"]
|
||||||
|
|
||||||
|
if findings:
|
||||||
|
sites_with_findings += 1
|
||||||
|
total_warn += len(warns)
|
||||||
|
total_fail += len(fails)
|
||||||
|
if args.json:
|
||||||
|
json_sites.append({"site": entry, "findings": findings})
|
||||||
|
else:
|
||||||
|
visible = fails if args.quiet else findings
|
||||||
|
if visible:
|
||||||
|
print(f"{BOLD}{entry}{RESET}")
|
||||||
|
for f in visible:
|
||||||
|
if f["severity"] == "fail":
|
||||||
|
color, tag = RED, "FAIL"
|
||||||
|
else:
|
||||||
|
color, tag = YELLOW, "warn"
|
||||||
|
lang = f" ({f['lang']})" if "lang" in f else ""
|
||||||
|
print(
|
||||||
|
f" {color}{tag}{RESET} {f['kind']}{lang}: "
|
||||||
|
f"{BOLD}{f['name']}{RESET} "
|
||||||
|
f"{DIM}line {f['line']}: {f['snippet']}{RESET}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
json.dump(
|
||||||
|
{
|
||||||
|
"summary": {
|
||||||
|
"sites_total": sites_total,
|
||||||
|
"sites_with_findings": sites_with_findings,
|
||||||
|
"warn": total_warn,
|
||||||
|
"fail": total_fail,
|
||||||
|
},
|
||||||
|
"sites": json_sites,
|
||||||
|
},
|
||||||
|
sys.stdout,
|
||||||
|
indent=2,
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
if total_fail > 0:
|
||||||
|
tag, color = "FAIL", RED
|
||||||
|
elif total_warn > 0:
|
||||||
|
tag, color = "WARN", YELLOW
|
||||||
|
else:
|
||||||
|
tag, color = "OK", GREEN
|
||||||
|
print(
|
||||||
|
f"\n{color}anti-ai-lint: {tag}{RESET} — "
|
||||||
|
f"{sites_with_findings}/{sites_total} sites flagged "
|
||||||
|
f"({total_fail} fail, {total_warn} warn)"
|
||||||
|
)
|
||||||
|
|
||||||
|
return 1 if total_fail > 0 else 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
107
tools/test-anti-ai-lint.sh
Executable file
107
tools/test-anti-ai-lint.sh
Executable file
@@ -0,0 +1,107 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Self-test for tools/anti-ai-lint.py.
|
||||||
|
# Builds a synthetic AI-text fixture in a temp dir, asserts the linter
|
||||||
|
# flags it, then verifies whitelist comments suppress the hit.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
||||||
|
LINT="$SCRIPT_DIR/anti-ai-lint.py"
|
||||||
|
|
||||||
|
tmp=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$tmp"' EXIT
|
||||||
|
|
||||||
|
mkdir -p "$tmp/build/synthetic-ai.test"
|
||||||
|
|
||||||
|
cat > "$tmp/build/synthetic-ai.test/index.html" <<'HTML'
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Synthetic AI sample</title>
|
||||||
|
<style>.foo { color: red; } /* leverage in CSS comment must be ignored */</style>
|
||||||
|
<script>const x = "leverage"; // in JS, must be ignored</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>In today's evolving landscape</h1>
|
||||||
|
<p>This is a comprehensive, robust, holistic solution that lets us leverage emerging trends.</p>
|
||||||
|
<p>We delve into the intricate tapestry of AI to navigate this pivotal moment.</p>
|
||||||
|
<h2>Challenges and Future Prospects</h2>
|
||||||
|
<ul>
|
||||||
|
<li>Effizienz: hoch — Skalierbarkeit: gut — Sicherheit: solide</li>
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
|
||||||
|
expect_finding() {
|
||||||
|
# expect_finding <json> <name>
|
||||||
|
python3 -c '
|
||||||
|
import json, sys
|
||||||
|
data = json.loads(sys.argv[1])
|
||||||
|
target = sys.argv[2]
|
||||||
|
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
|
||||||
|
if len(hits) != 1:
|
||||||
|
print(f"expected exactly 1 finding for {target!r}, got {len(hits)}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
' "$1" "$2"
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_no_finding() {
|
||||||
|
python3 -c '
|
||||||
|
import json, sys
|
||||||
|
data = json.loads(sys.argv[1])
|
||||||
|
target = sys.argv[2]
|
||||||
|
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
|
||||||
|
if hits:
|
||||||
|
print(f"unexpected finding for {target!r}: {hits}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
' "$1" "$2"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "[1] expecting FAIL on synthetic AI fixture..."
|
||||||
|
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) && rc=0 || rc=$?
|
||||||
|
if [ "$rc" -ne 1 ]; then
|
||||||
|
echo "FAIL: expected exit 1, got $rc" >&2
|
||||||
|
echo "$report" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
for term in "in today's evolving landscape" "Challenges and Future Prospects" \
|
||||||
|
"leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
|
||||||
|
expect_finding "$report" "$term" || exit 1
|
||||||
|
done
|
||||||
|
echo " OK"
|
||||||
|
|
||||||
|
echo "[2] expecting whitelist comment to suppress hits..."
|
||||||
|
sed -i '4a\ <!-- anti-ai-allow: leverage, comprehensive, delve, em-dash-3-bullet -->' \
|
||||||
|
"$tmp/build/synthetic-ai.test/index.html"
|
||||||
|
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) || true
|
||||||
|
for term in "leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
|
||||||
|
expect_no_finding "$report" "$term" || exit 1
|
||||||
|
done
|
||||||
|
# fail-level "in today's evolving landscape" should still be reported
|
||||||
|
expect_finding "$report" "in today's evolving landscape" || exit 1
|
||||||
|
echo " OK"
|
||||||
|
|
||||||
|
echo "[3] expecting fail-level hit still triggers exit 1..."
|
||||||
|
python3 "$LINT" "$tmp/build" >/dev/null 2>&1 && rc=0 || rc=$?
|
||||||
|
if [ "$rc" -ne 1 ]; then
|
||||||
|
echo "FAIL: expected exit 1, got $rc" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " OK"
|
||||||
|
|
||||||
|
echo "[4] expecting clean exit on neutral fixture..."
|
||||||
|
rm "$tmp/build/synthetic-ai.test/index.html"
|
||||||
|
mkdir -p "$tmp/build/clean.test"
|
||||||
|
echo '<!DOCTYPE html><html lang="de"><body><p>Ein einfacher Satz ohne KI-Vokabular.</p></body></html>' \
|
||||||
|
> "$tmp/build/clean.test/index.html"
|
||||||
|
rm -rf "$tmp/build/synthetic-ai.test"
|
||||||
|
out=$(python3 "$LINT" "$tmp/build" 2>&1) && rc=0 || rc=$?
|
||||||
|
if [ "$rc" -ne 0 ]; then
|
||||||
|
echo "FAIL: clean fixture should exit 0, got $rc" >&2
|
||||||
|
echo "$out"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " OK"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "all tests passed"
|
||||||
Reference in New Issue
Block a user