Fix Paperless upload: handle async task UUID response
Paperless-ngx post_document returns a task UUID string, not a document ID directly. The code assumed it was a dict and called .get() on a string, causing AttributeError. Now polls the task status endpoint to retrieve the actual integer document ID. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,8 @@ import mimetypes
|
||||
from datetime import datetime, timezone as dt_timezone
|
||||
from email.header import decode_header, make_header
|
||||
|
||||
import time
|
||||
|
||||
import requests
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
@@ -84,6 +86,56 @@ def _get_email_body(msg) -> str:
|
||||
return "\n".join(body_parts).strip()
|
||||
|
||||
|
||||
def _poll_paperless_task(api_url: str, headers: dict, task_id: str, filename: str, max_wait: int = 120) -> int | None:
|
||||
"""
|
||||
Pollt den Paperless-ngx Task-Status bis das Dokument verarbeitet wurde.
|
||||
Gibt die Dokument-ID zurück, oder None bei Fehler/Timeout.
|
||||
"""
|
||||
task_url = f"{api_url.rstrip('/')}/api/tasks/?task_id={task_id}"
|
||||
waited = 0
|
||||
interval = 2
|
||||
while waited < max_wait:
|
||||
try:
|
||||
resp = requests.get(task_url, headers=headers, timeout=30)
|
||||
resp.raise_for_status()
|
||||
tasks = resp.json()
|
||||
if tasks:
|
||||
task = tasks[0] if isinstance(tasks, list) else tasks
|
||||
status = task.get("status", "")
|
||||
if status == "SUCCESS":
|
||||
related = task.get("related_document")
|
||||
if related:
|
||||
# related_document can be a URL like "/api/documents/42/"
|
||||
# or just an ID
|
||||
if isinstance(related, str):
|
||||
parts = related.rstrip("/").split("/")
|
||||
try:
|
||||
return int(parts[-1])
|
||||
except (ValueError, IndexError):
|
||||
logger.warning("Konnte Dokument-ID nicht aus '%s' extrahieren.", related)
|
||||
return None
|
||||
return int(related)
|
||||
# Some versions use result_id or document_id
|
||||
for key in ("result_id", "document_id", "id"):
|
||||
val = task.get(key)
|
||||
if val is not None:
|
||||
try:
|
||||
return int(val)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
logger.warning("Task %s erfolgreich aber keine Dokument-ID gefunden: %s", task_id, task)
|
||||
return None
|
||||
elif status == "FAILURE":
|
||||
logger.error("Paperless-Task %s fehlgeschlagen für '%s': %s", task_id, filename, task.get("result"))
|
||||
return None
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Fehler beim Abfragen von Paperless-Task %s: %s", task_id, exc)
|
||||
time.sleep(interval)
|
||||
waited += interval
|
||||
logger.error("Timeout beim Warten auf Paperless-Task %s für '%s' (%ds).", task_id, filename, max_wait)
|
||||
return None
|
||||
|
||||
|
||||
def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None:
|
||||
"""
|
||||
Lädt einen Anhang in Paperless-NGX hoch.
|
||||
@@ -140,9 +192,18 @@ def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betref
|
||||
timeout=300, # 5 Minuten für große Anhänge
|
||||
)
|
||||
response.raise_for_status()
|
||||
# Paperless gibt die neue Dokument-ID zurück (als Integer oder UUID-String)
|
||||
result = response.json()
|
||||
doc_id = result if isinstance(result, int) else result.get("id")
|
||||
|
||||
# Paperless-ngx post_document returns a task UUID string.
|
||||
# We need to poll the task status to get the actual document ID.
|
||||
if isinstance(result, str):
|
||||
task_id = result
|
||||
doc_id = _poll_paperless_task(api_url, headers, task_id, safe_filename)
|
||||
elif isinstance(result, int):
|
||||
doc_id = result
|
||||
else:
|
||||
doc_id = result.get("id")
|
||||
|
||||
logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id)
|
||||
return doc_id
|
||||
except requests.RequestException as exc:
|
||||
|
||||
Reference in New Issue
Block a user