Fix Paperless upload: handle async task UUID response

Paperless-ngx post_document returns a task UUID string, not a document
ID directly. The code assumed it was a dict and called .get() on a
string, causing AttributeError. Now polls the task status endpoint to
retrieve the actual integer document ID.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
SysAdmin Agent
2026-03-11 21:25:55 +00:00
parent 83cf2798b1
commit 65e025d8c4

View File

@@ -28,6 +28,8 @@ import mimetypes
from datetime import datetime, timezone as dt_timezone from datetime import datetime, timezone as dt_timezone
from email.header import decode_header, make_header from email.header import decode_header, make_header
import time
import requests import requests
from celery import shared_task from celery import shared_task
from django.conf import settings from django.conf import settings
@@ -84,6 +86,56 @@ def _get_email_body(msg) -> str:
return "\n".join(body_parts).strip() return "\n".join(body_parts).strip()
def _poll_paperless_task(api_url: str, headers: dict, task_id: str, filename: str, max_wait: int = 120) -> int | None:
"""
Pollt den Paperless-ngx Task-Status bis das Dokument verarbeitet wurde.
Gibt die Dokument-ID zurück, oder None bei Fehler/Timeout.
"""
task_url = f"{api_url.rstrip('/')}/api/tasks/?task_id={task_id}"
waited = 0
interval = 2
while waited < max_wait:
try:
resp = requests.get(task_url, headers=headers, timeout=30)
resp.raise_for_status()
tasks = resp.json()
if tasks:
task = tasks[0] if isinstance(tasks, list) else tasks
status = task.get("status", "")
if status == "SUCCESS":
related = task.get("related_document")
if related:
# related_document can be a URL like "/api/documents/42/"
# or just an ID
if isinstance(related, str):
parts = related.rstrip("/").split("/")
try:
return int(parts[-1])
except (ValueError, IndexError):
logger.warning("Konnte Dokument-ID nicht aus '%s' extrahieren.", related)
return None
return int(related)
# Some versions use result_id or document_id
for key in ("result_id", "document_id", "id"):
val = task.get(key)
if val is not None:
try:
return int(val)
except (ValueError, TypeError):
pass
logger.warning("Task %s erfolgreich aber keine Dokument-ID gefunden: %s", task_id, task)
return None
elif status == "FAILURE":
logger.error("Paperless-Task %s fehlgeschlagen für '%s': %s", task_id, filename, task.get("result"))
return None
except requests.RequestException as exc:
logger.warning("Fehler beim Abfragen von Paperless-Task %s: %s", task_id, exc)
time.sleep(interval)
waited += interval
logger.error("Timeout beim Warten auf Paperless-Task %s für '%s' (%ds).", task_id, filename, max_wait)
return None
def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None: def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None:
""" """
Lädt einen Anhang in Paperless-NGX hoch. Lädt einen Anhang in Paperless-NGX hoch.
@@ -140,9 +192,18 @@ def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betref
timeout=300, # 5 Minuten für große Anhänge timeout=300, # 5 Minuten für große Anhänge
) )
response.raise_for_status() response.raise_for_status()
# Paperless gibt die neue Dokument-ID zurück (als Integer oder UUID-String)
result = response.json() result = response.json()
doc_id = result if isinstance(result, int) else result.get("id")
# Paperless-ngx post_document returns a task UUID string.
# We need to poll the task status to get the actual document ID.
if isinstance(result, str):
task_id = result
doc_id = _poll_paperless_task(api_url, headers, task_id, safe_filename)
elif isinstance(result, int):
doc_id = result
else:
doc_id = result.get("id")
logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id) logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id)
return doc_id return doc_id
except requests.RequestException as exc: except requests.RequestException as exc: