feat: add comprehensive GitHub workflow and development tools

2025-09-06 18:31:54 +02:00
commit ab23d7187e
10224 changed files with 2075210 additions and 0 deletions
--- a/app/.venv/Lib/site-packages/weasyprint/pdf/init.py
+++ b/app/.venv/Lib/site-packages/weasyprint/pdf/init.py
@@ -0,0 +1,302 @@
+"""PDF generation management."""
+
+import pydyf
+
+from .. import VERSION, Attachment
+from ..html import W3C_DATE_RE
+from ..logger import LOGGER, PROGRESS_LOGGER
+from ..matrix import Matrix
+from . import pdfa, pdfua
+from .fonts import build_fonts_dictionary
+from .stream import Stream
+
+from .anchors import (  # isort:skip
+    add_annotations, add_inputs, add_links, add_outlines, resolve_links,
+    write_pdf_attachment)
+
+VARIANTS = {
+    name: data for variants in (pdfa.VARIANTS, pdfua.VARIANTS)
+    for (name, data) in variants.items()}
+
+
+def _w3c_date_to_pdf(string, attr_name):
+    """Tranform W3C date to PDF format."""
+    if string is None:
+        return None
+    match = W3C_DATE_RE.match(string)
+    if match is None:
+        LOGGER.warning(f'Invalid {attr_name} date: {string!r}')
+        return None
+    groups = match.groupdict()
+    pdf_date = ''
+    found = groups['hour']
+    for key in ('second', 'minute', 'hour', 'day', 'month', 'year'):
+        if groups[key]:
+            found = True
+            pdf_date = groups[key] + pdf_date
+        elif found:
+            pdf_date = f'{(key in ("day", "month")):02d}{pdf_date}'
+    if groups['hour']:
+        assert groups['minute']
+        if groups['tz_hour']:
+            assert groups['tz_hour'].startswith(('+', '-'))
+            assert groups['tz_minute']
+            tz_hour = int(groups['tz_hour'])
+            tz_minute = int(groups['tz_minute'])
+            pdf_date += f"{tz_hour:+03d}'{tz_minute:02d}"
+        else:
+            pdf_date += 'Z'
+    return f'D:{pdf_date}'
+
+
+def _reference_resources(pdf, resources, images, fonts):
+    if 'Font' in resources:
+        assert resources['Font'] is None
+        resources['Font'] = fonts
+    _use_references(pdf, resources, images)
+    pdf.add_object(resources)
+    return resources.reference
+
+
+def _use_references(pdf, resources, images):
+    # XObjects
+    for key, x_object in resources.get('XObject', {}).items():
+        # Images
+        if x_object is None:
+            image_data = images[key]
+            x_object = image_data['x_object']
+
+            if x_object is not None:
+                # Image already added to PDF
+                resources['XObject'][key] = x_object.reference
+                continue
+
+            image = image_data['image']
+            dpi_ratio = max(image_data['dpi_ratios'])
+            x_object = image.get_x_object(image_data['interpolate'], dpi_ratio)
+            image_data['x_object'] = x_object
+
+        pdf.add_object(x_object)
+        resources['XObject'][key] = x_object.reference
+
+        # Masks
+        if 'SMask' in x_object.extra:
+            pdf.add_object(x_object.extra['SMask'])
+            x_object.extra['SMask'] = x_object.extra['SMask'].reference
+
+        # Resources
+        if 'Resources' in x_object.extra:
+            x_object.extra['Resources'] = _reference_resources(
+                pdf, x_object.extra['Resources'], images, resources['Font'])
+
+    # Patterns
+    for key, pattern in resources.get('Pattern', {}).items():
+        pdf.add_object(pattern)
+        resources['Pattern'][key] = pattern.reference
+        if 'Resources' in pattern.extra:
+            pattern.extra['Resources'] = _reference_resources(
+                pdf, pattern.extra['Resources'], images, resources['Font'])
+
+    # Shadings
+    for key, shading in resources.get('Shading', {}).items():
+        pdf.add_object(shading)
+        resources['Shading'][key] = shading.reference
+
+    # Alpha states
+    for key, alpha in resources.get('ExtGState', {}).items():
+        if 'SMask' in alpha and 'G' in alpha['SMask']:
+            alpha['SMask']['G'] = alpha['SMask']['G'].reference
+
+
+def generate_pdf(document, target, zoom, **options):
+    # 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
+    scale = zoom * 0.75
+
+    PROGRESS_LOGGER.info('Step 6 - Creating PDF')
+
+    # Set properties according to PDF variants
+    mark = False
+    variant = options['pdf_variant']
+    if variant:
+        variant_function, properties = VARIANTS[variant]
+        if 'mark' in properties:
+            mark = properties['mark']
+
+    pdf = pydyf.PDF()
+    states = pydyf.Dictionary()
+    x_objects = pydyf.Dictionary()
+    patterns = pydyf.Dictionary()
+    shadings = pydyf.Dictionary()
+    images = {}
+    resources = pydyf.Dictionary({
+        'ExtGState': states,
+        'XObject': x_objects,
+        'Pattern': patterns,
+        'Shading': shadings,
+    })
+    pdf.add_object(resources)
+    pdf_names = []
+
+    # Links and anchors
+    page_links_and_anchors = list(resolve_links(document.pages))
+
+    annot_files = {}
+    pdf_pages, page_streams = [], []
+    compress = not options['uncompressed_pdf']
+    for page_number, (page, links_and_anchors) in enumerate(
+            zip(document.pages, page_links_and_anchors)):
+        # Draw from the top-left corner
+        matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)
+
+        page_width = scale * (
+            page.width + page.bleed['left'] + page.bleed['right'])
+        page_height = scale * (
+            page.height + page.bleed['top'] + page.bleed['bottom'])
+        left = -scale * page.bleed['left']
+        top = -scale * page.bleed['top']
+        right = left + page_width
+        bottom = top + page_height
+
+        page_rectangle = (
+            left / scale, top / scale,
+            (right - left) / scale, (bottom - top) / scale)
+        stream = Stream(
+            document.fonts, page_rectangle, states, x_objects, patterns,
+            shadings, images, mark, compress=compress)
+        stream.transform(d=-1, f=(page.height * scale))
+        pdf.add_object(stream)
+        page_streams.append(stream)
+
+        pdf_page = pydyf.Dictionary({
+            'Type': '/Page',
+            'Parent': pdf.pages.reference,
+            'MediaBox': pydyf.Array([left, top, right, bottom]),
+            'Contents': stream.reference,
+            'Resources': resources.reference,
+        })
+        if mark:
+            pdf_page['Tabs'] = '/S'
+            pdf_page['StructParents'] = page_number
+        pdf.add_page(pdf_page)
+        pdf_pages.append(pdf_page)
+
+        add_links(links_and_anchors, matrix, pdf, pdf_page, pdf_names, mark)
+        add_annotations(
+            links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files,
+            compress)
+        add_inputs(
+            page.inputs, matrix, pdf, pdf_page, resources, stream,
+            document.font_config.font_map, compress)
+        page.paint(stream, scale)
+
+        # Bleed
+        bleed = {key: value * 0.75 for key, value in page.bleed.items()}
+
+        trim_left = left + bleed['left']
+        trim_top = top + bleed['top']
+        trim_right = right - bleed['right']
+        trim_bottom = bottom - bleed['bottom']
+
+        # Arbitrarly set PDF BleedBox between CSS bleed box (MediaBox) and
+        # CSS page box (TrimBox) at most 10 points from the TrimBox.
+        bleed_left = trim_left - min(10, bleed['left'])
+        bleed_top = trim_top - min(10, bleed['top'])
+        bleed_right = trim_right + min(10, bleed['right'])
+        bleed_bottom = trim_bottom + min(10, bleed['bottom'])
+
+        pdf_page['TrimBox'] = pydyf.Array([
+            trim_left, trim_top, trim_right, trim_bottom])
+        pdf_page['BleedBox'] = pydyf.Array([
+            bleed_left, bleed_top, bleed_right, bleed_bottom])
+
+    # Outlines
+    add_outlines(pdf, document.make_bookmark_tree(scale, transform_pages=True))
+
+    PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
+
+    # PDF information
+    pdf.info['Producer'] = pydyf.String(f'WeasyPrint {VERSION}')
+    metadata = document.metadata
+    if metadata.title:
+        pdf.info['Title'] = pydyf.String(metadata.title)
+    if metadata.authors:
+        pdf.info['Author'] = pydyf.String(', '.join(metadata.authors))
+    if metadata.description:
+        pdf.info['Subject'] = pydyf.String(metadata.description)
+    if metadata.keywords:
+        pdf.info['Keywords'] = pydyf.String(', '.join(metadata.keywords))
+    if metadata.generator:
+        pdf.info['Creator'] = pydyf.String(metadata.generator)
+    if metadata.created:
+        pdf.info['CreationDate'] = pydyf.String(
+            _w3c_date_to_pdf(metadata.created, 'created'))
+    if metadata.modified:
+        pdf.info['ModDate'] = pydyf.String(
+            _w3c_date_to_pdf(metadata.modified, 'modified'))
+    if metadata.lang:
+        pdf.catalog['Lang'] = pydyf.String(metadata.lang)
+    if options['custom_metadata']:
+        for key, value in metadata.custom.items():
+            key = ''.join(char for char in key if char.isalnum())
+            key = key.encode('ascii', errors='ignore').decode()
+            if key:
+                pdf.info[key] = pydyf.String(value)
+
+    # Embedded files
+    attachments = metadata.attachments.copy()
+    if options['attachments']:
+        for attachment in options['attachments']:
+            if not isinstance(attachment, Attachment):
+                attachment = Attachment(
+                    attachment, url_fetcher=document.url_fetcher)
+            attachments.append(attachment)
+    pdf_attachments = []
+    for attachment in attachments:
+        pdf_attachment = write_pdf_attachment(pdf, attachment, compress)
+        if pdf_attachment is not None:
+            pdf_attachments.append(pdf_attachment)
+    if pdf_attachments:
+        content = pydyf.Dictionary({'Names': pydyf.Array()})
+        for i, pdf_attachment in enumerate(pdf_attachments):
+            content['Names'].append(pydyf.String(f'attachment{i}'))
+            content['Names'].append(pdf_attachment.reference)
+        pdf.add_object(content)
+        if 'Names' not in pdf.catalog:
+            pdf.catalog['Names'] = pydyf.Dictionary()
+        pdf.catalog['Names']['EmbeddedFiles'] = content.reference
+
+    # Embedded fonts
+    subset = not options['full_fonts']
+    pdf_fonts = build_fonts_dictionary(
+        pdf, document.fonts, compress, subset, options)
+    pdf.add_object(pdf_fonts)
+    if 'AcroForm' in pdf.catalog:
+        # Include Dingbats for forms
+        dingbats = pydyf.Dictionary({
+            'Type': '/Font',
+            'Subtype': '/Type1',
+            'BaseFont': '/ZapfDingbats',
+        })
+        pdf.add_object(dingbats)
+        pdf_fonts['ZaDb'] = dingbats.reference
+    resources['Font'] = pdf_fonts.reference
+    _use_references(pdf, resources, images)
+
+    # Anchors
+    if pdf_names:
+        # Anchors are name trees that have to be sorted
+        name_array = pydyf.Array()
+        for anchor in sorted(pdf_names):
+            name_array.append(pydyf.String(anchor[0]))
+            name_array.append(anchor[1])
+        dests = pydyf.Dictionary({'Names': name_array})
+        if 'Names' not in pdf.catalog:
+            pdf.catalog['Names'] = pydyf.Dictionary()
+        pdf.catalog['Names']['Dests'] = dests
+
+    # Apply PDF variants functions
+    if variant:
+        variant_function(
+            pdf, metadata, document, page_streams, attachments, compress)
+
+    return pdf