- Created custom Dockerfile extending official paperless-ngx image - Added tesseract-ocr-deu package for German language OCR - Set PAPERLESS_OCR_LANGUAGE=deu+eng environment variable - Updated CI/CD pipeline to build and push custom paperless image - Modified deployment script to pull paperless image from GHCR - Tested locally: German (deu) language pack now available alongside English
22 lines
666 B
Docker
22 lines
666 B
Docker
# Custom Paperless-ngx with German OCR support
|
|
FROM ghcr.io/paperless-ngx/paperless-ngx:latest
|
|
|
|
# Switch to root to install packages
|
|
USER root
|
|
|
|
# Update package list and install German OCR language data
|
|
# The correct package name is tesseract-ocr-deu (not tesseract-data-deu)
|
|
RUN apt-get update && \
|
|
apt-get install -y \
|
|
tesseract-ocr-deu \
|
|
&& apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# Verify German language pack is installed
|
|
RUN tesseract --list-langs | grep deu || echo "German language pack not found"
|
|
|
|
# Switch back to paperless user
|
|
USER paperless
|
|
|
|
# Set the OCR language to include German and English
|
|
ENV PAPERLESS_OCR_LANGUAGE=deu+eng |