Fix Paperless upload: handle async task UUID response
Paperless-ngx post_document returns a task UUID string, not a document ID directly. The code assumed it was a dict and called .get() on a string, causing AttributeError. Now polls the task status endpoint to retrieve the actual integer document ID. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,8 @@ import mimetypes
|
|||||||
from datetime import datetime, timezone as dt_timezone
|
from datetime import datetime, timezone as dt_timezone
|
||||||
from email.header import decode_header, make_header
|
from email.header import decode_header, make_header
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -84,6 +86,56 @@ def _get_email_body(msg) -> str:
|
|||||||
return "\n".join(body_parts).strip()
|
return "\n".join(body_parts).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _poll_paperless_task(api_url: str, headers: dict, task_id: str, filename: str, max_wait: int = 120) -> int | None:
|
||||||
|
"""
|
||||||
|
Pollt den Paperless-ngx Task-Status bis das Dokument verarbeitet wurde.
|
||||||
|
Gibt die Dokument-ID zurück, oder None bei Fehler/Timeout.
|
||||||
|
"""
|
||||||
|
task_url = f"{api_url.rstrip('/')}/api/tasks/?task_id={task_id}"
|
||||||
|
waited = 0
|
||||||
|
interval = 2
|
||||||
|
while waited < max_wait:
|
||||||
|
try:
|
||||||
|
resp = requests.get(task_url, headers=headers, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
tasks = resp.json()
|
||||||
|
if tasks:
|
||||||
|
task = tasks[0] if isinstance(tasks, list) else tasks
|
||||||
|
status = task.get("status", "")
|
||||||
|
if status == "SUCCESS":
|
||||||
|
related = task.get("related_document")
|
||||||
|
if related:
|
||||||
|
# related_document can be a URL like "/api/documents/42/"
|
||||||
|
# or just an ID
|
||||||
|
if isinstance(related, str):
|
||||||
|
parts = related.rstrip("/").split("/")
|
||||||
|
try:
|
||||||
|
return int(parts[-1])
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
logger.warning("Konnte Dokument-ID nicht aus '%s' extrahieren.", related)
|
||||||
|
return None
|
||||||
|
return int(related)
|
||||||
|
# Some versions use result_id or document_id
|
||||||
|
for key in ("result_id", "document_id", "id"):
|
||||||
|
val = task.get(key)
|
||||||
|
if val is not None:
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
logger.warning("Task %s erfolgreich aber keine Dokument-ID gefunden: %s", task_id, task)
|
||||||
|
return None
|
||||||
|
elif status == "FAILURE":
|
||||||
|
logger.error("Paperless-Task %s fehlgeschlagen für '%s': %s", task_id, filename, task.get("result"))
|
||||||
|
return None
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
logger.warning("Fehler beim Abfragen von Paperless-Task %s: %s", task_id, exc)
|
||||||
|
time.sleep(interval)
|
||||||
|
waited += interval
|
||||||
|
logger.error("Timeout beim Warten auf Paperless-Task %s für '%s' (%ds).", task_id, filename, max_wait)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None:
|
def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None:
|
||||||
"""
|
"""
|
||||||
Lädt einen Anhang in Paperless-NGX hoch.
|
Lädt einen Anhang in Paperless-NGX hoch.
|
||||||
@@ -140,9 +192,18 @@ def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betref
|
|||||||
timeout=300, # 5 Minuten für große Anhänge
|
timeout=300, # 5 Minuten für große Anhänge
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
# Paperless gibt die neue Dokument-ID zurück (als Integer oder UUID-String)
|
|
||||||
result = response.json()
|
result = response.json()
|
||||||
doc_id = result if isinstance(result, int) else result.get("id")
|
|
||||||
|
# Paperless-ngx post_document returns a task UUID string.
|
||||||
|
# We need to poll the task status to get the actual document ID.
|
||||||
|
if isinstance(result, str):
|
||||||
|
task_id = result
|
||||||
|
doc_id = _poll_paperless_task(api_url, headers, task_id, safe_filename)
|
||||||
|
elif isinstance(result, int):
|
||||||
|
doc_id = result
|
||||||
|
else:
|
||||||
|
doc_id = result.get("id")
|
||||||
|
|
||||||
logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id)
|
logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id)
|
||||||
return doc_id
|
return doc_id
|
||||||
except requests.RequestException as exc:
|
except requests.RequestException as exc:
|
||||||
|
|||||||
Reference in New Issue
Block a user