feat: add comprehensive GitHub workflow and development tools
This commit is contained in:
302
app/.venv/Lib/site-packages/weasyprint/pdf/__init__.py
Normal file
302
app/.venv/Lib/site-packages/weasyprint/pdf/__init__.py
Normal file
@@ -0,0 +1,302 @@
|
||||
"""PDF generation management."""
|
||||
|
||||
import pydyf
|
||||
|
||||
from .. import VERSION, Attachment
|
||||
from ..html import W3C_DATE_RE
|
||||
from ..logger import LOGGER, PROGRESS_LOGGER
|
||||
from ..matrix import Matrix
|
||||
from . import pdfa, pdfua
|
||||
from .fonts import build_fonts_dictionary
|
||||
from .stream import Stream
|
||||
|
||||
from .anchors import ( # isort:skip
|
||||
add_annotations, add_inputs, add_links, add_outlines, resolve_links,
|
||||
write_pdf_attachment)
|
||||
|
||||
VARIANTS = {
|
||||
name: data for variants in (pdfa.VARIANTS, pdfua.VARIANTS)
|
||||
for (name, data) in variants.items()}
|
||||
|
||||
|
||||
def _w3c_date_to_pdf(string, attr_name):
|
||||
"""Tranform W3C date to PDF format."""
|
||||
if string is None:
|
||||
return None
|
||||
match = W3C_DATE_RE.match(string)
|
||||
if match is None:
|
||||
LOGGER.warning(f'Invalid {attr_name} date: {string!r}')
|
||||
return None
|
||||
groups = match.groupdict()
|
||||
pdf_date = ''
|
||||
found = groups['hour']
|
||||
for key in ('second', 'minute', 'hour', 'day', 'month', 'year'):
|
||||
if groups[key]:
|
||||
found = True
|
||||
pdf_date = groups[key] + pdf_date
|
||||
elif found:
|
||||
pdf_date = f'{(key in ("day", "month")):02d}{pdf_date}'
|
||||
if groups['hour']:
|
||||
assert groups['minute']
|
||||
if groups['tz_hour']:
|
||||
assert groups['tz_hour'].startswith(('+', '-'))
|
||||
assert groups['tz_minute']
|
||||
tz_hour = int(groups['tz_hour'])
|
||||
tz_minute = int(groups['tz_minute'])
|
||||
pdf_date += f"{tz_hour:+03d}'{tz_minute:02d}"
|
||||
else:
|
||||
pdf_date += 'Z'
|
||||
return f'D:{pdf_date}'
|
||||
|
||||
|
||||
def _reference_resources(pdf, resources, images, fonts):
|
||||
if 'Font' in resources:
|
||||
assert resources['Font'] is None
|
||||
resources['Font'] = fonts
|
||||
_use_references(pdf, resources, images)
|
||||
pdf.add_object(resources)
|
||||
return resources.reference
|
||||
|
||||
|
||||
def _use_references(pdf, resources, images):
|
||||
# XObjects
|
||||
for key, x_object in resources.get('XObject', {}).items():
|
||||
# Images
|
||||
if x_object is None:
|
||||
image_data = images[key]
|
||||
x_object = image_data['x_object']
|
||||
|
||||
if x_object is not None:
|
||||
# Image already added to PDF
|
||||
resources['XObject'][key] = x_object.reference
|
||||
continue
|
||||
|
||||
image = image_data['image']
|
||||
dpi_ratio = max(image_data['dpi_ratios'])
|
||||
x_object = image.get_x_object(image_data['interpolate'], dpi_ratio)
|
||||
image_data['x_object'] = x_object
|
||||
|
||||
pdf.add_object(x_object)
|
||||
resources['XObject'][key] = x_object.reference
|
||||
|
||||
# Masks
|
||||
if 'SMask' in x_object.extra:
|
||||
pdf.add_object(x_object.extra['SMask'])
|
||||
x_object.extra['SMask'] = x_object.extra['SMask'].reference
|
||||
|
||||
# Resources
|
||||
if 'Resources' in x_object.extra:
|
||||
x_object.extra['Resources'] = _reference_resources(
|
||||
pdf, x_object.extra['Resources'], images, resources['Font'])
|
||||
|
||||
# Patterns
|
||||
for key, pattern in resources.get('Pattern', {}).items():
|
||||
pdf.add_object(pattern)
|
||||
resources['Pattern'][key] = pattern.reference
|
||||
if 'Resources' in pattern.extra:
|
||||
pattern.extra['Resources'] = _reference_resources(
|
||||
pdf, pattern.extra['Resources'], images, resources['Font'])
|
||||
|
||||
# Shadings
|
||||
for key, shading in resources.get('Shading', {}).items():
|
||||
pdf.add_object(shading)
|
||||
resources['Shading'][key] = shading.reference
|
||||
|
||||
# Alpha states
|
||||
for key, alpha in resources.get('ExtGState', {}).items():
|
||||
if 'SMask' in alpha and 'G' in alpha['SMask']:
|
||||
alpha['SMask']['G'] = alpha['SMask']['G'].reference
|
||||
|
||||
|
||||
def generate_pdf(document, target, zoom, **options):
|
||||
# 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
|
||||
scale = zoom * 0.75
|
||||
|
||||
PROGRESS_LOGGER.info('Step 6 - Creating PDF')
|
||||
|
||||
# Set properties according to PDF variants
|
||||
mark = False
|
||||
variant = options['pdf_variant']
|
||||
if variant:
|
||||
variant_function, properties = VARIANTS[variant]
|
||||
if 'mark' in properties:
|
||||
mark = properties['mark']
|
||||
|
||||
pdf = pydyf.PDF()
|
||||
states = pydyf.Dictionary()
|
||||
x_objects = pydyf.Dictionary()
|
||||
patterns = pydyf.Dictionary()
|
||||
shadings = pydyf.Dictionary()
|
||||
images = {}
|
||||
resources = pydyf.Dictionary({
|
||||
'ExtGState': states,
|
||||
'XObject': x_objects,
|
||||
'Pattern': patterns,
|
||||
'Shading': shadings,
|
||||
})
|
||||
pdf.add_object(resources)
|
||||
pdf_names = []
|
||||
|
||||
# Links and anchors
|
||||
page_links_and_anchors = list(resolve_links(document.pages))
|
||||
|
||||
annot_files = {}
|
||||
pdf_pages, page_streams = [], []
|
||||
compress = not options['uncompressed_pdf']
|
||||
for page_number, (page, links_and_anchors) in enumerate(
|
||||
zip(document.pages, page_links_and_anchors)):
|
||||
# Draw from the top-left corner
|
||||
matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)
|
||||
|
||||
page_width = scale * (
|
||||
page.width + page.bleed['left'] + page.bleed['right'])
|
||||
page_height = scale * (
|
||||
page.height + page.bleed['top'] + page.bleed['bottom'])
|
||||
left = -scale * page.bleed['left']
|
||||
top = -scale * page.bleed['top']
|
||||
right = left + page_width
|
||||
bottom = top + page_height
|
||||
|
||||
page_rectangle = (
|
||||
left / scale, top / scale,
|
||||
(right - left) / scale, (bottom - top) / scale)
|
||||
stream = Stream(
|
||||
document.fonts, page_rectangle, states, x_objects, patterns,
|
||||
shadings, images, mark, compress=compress)
|
||||
stream.transform(d=-1, f=(page.height * scale))
|
||||
pdf.add_object(stream)
|
||||
page_streams.append(stream)
|
||||
|
||||
pdf_page = pydyf.Dictionary({
|
||||
'Type': '/Page',
|
||||
'Parent': pdf.pages.reference,
|
||||
'MediaBox': pydyf.Array([left, top, right, bottom]),
|
||||
'Contents': stream.reference,
|
||||
'Resources': resources.reference,
|
||||
})
|
||||
if mark:
|
||||
pdf_page['Tabs'] = '/S'
|
||||
pdf_page['StructParents'] = page_number
|
||||
pdf.add_page(pdf_page)
|
||||
pdf_pages.append(pdf_page)
|
||||
|
||||
add_links(links_and_anchors, matrix, pdf, pdf_page, pdf_names, mark)
|
||||
add_annotations(
|
||||
links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files,
|
||||
compress)
|
||||
add_inputs(
|
||||
page.inputs, matrix, pdf, pdf_page, resources, stream,
|
||||
document.font_config.font_map, compress)
|
||||
page.paint(stream, scale)
|
||||
|
||||
# Bleed
|
||||
bleed = {key: value * 0.75 for key, value in page.bleed.items()}
|
||||
|
||||
trim_left = left + bleed['left']
|
||||
trim_top = top + bleed['top']
|
||||
trim_right = right - bleed['right']
|
||||
trim_bottom = bottom - bleed['bottom']
|
||||
|
||||
# Arbitrarly set PDF BleedBox between CSS bleed box (MediaBox) and
|
||||
# CSS page box (TrimBox) at most 10 points from the TrimBox.
|
||||
bleed_left = trim_left - min(10, bleed['left'])
|
||||
bleed_top = trim_top - min(10, bleed['top'])
|
||||
bleed_right = trim_right + min(10, bleed['right'])
|
||||
bleed_bottom = trim_bottom + min(10, bleed['bottom'])
|
||||
|
||||
pdf_page['TrimBox'] = pydyf.Array([
|
||||
trim_left, trim_top, trim_right, trim_bottom])
|
||||
pdf_page['BleedBox'] = pydyf.Array([
|
||||
bleed_left, bleed_top, bleed_right, bleed_bottom])
|
||||
|
||||
# Outlines
|
||||
add_outlines(pdf, document.make_bookmark_tree(scale, transform_pages=True))
|
||||
|
||||
PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
|
||||
|
||||
# PDF information
|
||||
pdf.info['Producer'] = pydyf.String(f'WeasyPrint {VERSION}')
|
||||
metadata = document.metadata
|
||||
if metadata.title:
|
||||
pdf.info['Title'] = pydyf.String(metadata.title)
|
||||
if metadata.authors:
|
||||
pdf.info['Author'] = pydyf.String(', '.join(metadata.authors))
|
||||
if metadata.description:
|
||||
pdf.info['Subject'] = pydyf.String(metadata.description)
|
||||
if metadata.keywords:
|
||||
pdf.info['Keywords'] = pydyf.String(', '.join(metadata.keywords))
|
||||
if metadata.generator:
|
||||
pdf.info['Creator'] = pydyf.String(metadata.generator)
|
||||
if metadata.created:
|
||||
pdf.info['CreationDate'] = pydyf.String(
|
||||
_w3c_date_to_pdf(metadata.created, 'created'))
|
||||
if metadata.modified:
|
||||
pdf.info['ModDate'] = pydyf.String(
|
||||
_w3c_date_to_pdf(metadata.modified, 'modified'))
|
||||
if metadata.lang:
|
||||
pdf.catalog['Lang'] = pydyf.String(metadata.lang)
|
||||
if options['custom_metadata']:
|
||||
for key, value in metadata.custom.items():
|
||||
key = ''.join(char for char in key if char.isalnum())
|
||||
key = key.encode('ascii', errors='ignore').decode()
|
||||
if key:
|
||||
pdf.info[key] = pydyf.String(value)
|
||||
|
||||
# Embedded files
|
||||
attachments = metadata.attachments.copy()
|
||||
if options['attachments']:
|
||||
for attachment in options['attachments']:
|
||||
if not isinstance(attachment, Attachment):
|
||||
attachment = Attachment(
|
||||
attachment, url_fetcher=document.url_fetcher)
|
||||
attachments.append(attachment)
|
||||
pdf_attachments = []
|
||||
for attachment in attachments:
|
||||
pdf_attachment = write_pdf_attachment(pdf, attachment, compress)
|
||||
if pdf_attachment is not None:
|
||||
pdf_attachments.append(pdf_attachment)
|
||||
if pdf_attachments:
|
||||
content = pydyf.Dictionary({'Names': pydyf.Array()})
|
||||
for i, pdf_attachment in enumerate(pdf_attachments):
|
||||
content['Names'].append(pydyf.String(f'attachment{i}'))
|
||||
content['Names'].append(pdf_attachment.reference)
|
||||
pdf.add_object(content)
|
||||
if 'Names' not in pdf.catalog:
|
||||
pdf.catalog['Names'] = pydyf.Dictionary()
|
||||
pdf.catalog['Names']['EmbeddedFiles'] = content.reference
|
||||
|
||||
# Embedded fonts
|
||||
subset = not options['full_fonts']
|
||||
pdf_fonts = build_fonts_dictionary(
|
||||
pdf, document.fonts, compress, subset, options)
|
||||
pdf.add_object(pdf_fonts)
|
||||
if 'AcroForm' in pdf.catalog:
|
||||
# Include Dingbats for forms
|
||||
dingbats = pydyf.Dictionary({
|
||||
'Type': '/Font',
|
||||
'Subtype': '/Type1',
|
||||
'BaseFont': '/ZapfDingbats',
|
||||
})
|
||||
pdf.add_object(dingbats)
|
||||
pdf_fonts['ZaDb'] = dingbats.reference
|
||||
resources['Font'] = pdf_fonts.reference
|
||||
_use_references(pdf, resources, images)
|
||||
|
||||
# Anchors
|
||||
if pdf_names:
|
||||
# Anchors are name trees that have to be sorted
|
||||
name_array = pydyf.Array()
|
||||
for anchor in sorted(pdf_names):
|
||||
name_array.append(pydyf.String(anchor[0]))
|
||||
name_array.append(anchor[1])
|
||||
dests = pydyf.Dictionary({'Names': name_array})
|
||||
if 'Names' not in pdf.catalog:
|
||||
pdf.catalog['Names'] = pydyf.Dictionary()
|
||||
pdf.catalog['Names']['Dests'] = dests
|
||||
|
||||
# Apply PDF variants functions
|
||||
if variant:
|
||||
variant_function(
|
||||
pdf, metadata, document, page_streams, attachments, compress)
|
||||
|
||||
return pdf
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
384
app/.venv/Lib/site-packages/weasyprint/pdf/anchors.py
Normal file
384
app/.venv/Lib/site-packages/weasyprint/pdf/anchors.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""Insert anchors, links, bookmarks and inputs in PDFs."""
|
||||
|
||||
import io
|
||||
import mimetypes
|
||||
from hashlib import md5
|
||||
from os.path import basename
|
||||
from urllib.parse import unquote, urlsplit
|
||||
|
||||
import pydyf
|
||||
|
||||
from .. import Attachment
|
||||
from ..logger import LOGGER
|
||||
from ..text.ffi import ffi, gobject, pango
|
||||
from ..text.fonts import get_font_description
|
||||
from ..urls import URLFetchingError
|
||||
|
||||
|
||||
def add_links(links_and_anchors, matrix, pdf, page, names, mark):
|
||||
"""Include hyperlinks in given PDF page."""
|
||||
links, anchors = links_and_anchors
|
||||
|
||||
for link_type, link_target, rectangle, box in links:
|
||||
x1, y1 = matrix.transform_point(*rectangle[:2])
|
||||
x2, y2 = matrix.transform_point(*rectangle[2:])
|
||||
if link_type in ('internal', 'external'):
|
||||
box.link_annotation = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Link',
|
||||
'Rect': pydyf.Array([x1, y1, x2, y2]),
|
||||
'BS': pydyf.Dictionary({'W': 0}),
|
||||
})
|
||||
if mark:
|
||||
box.link_annotation['Contents'] = pydyf.String(link_target)
|
||||
if link_type == 'internal':
|
||||
box.link_annotation['Dest'] = pydyf.String(link_target)
|
||||
else:
|
||||
box.link_annotation['A'] = pydyf.Dictionary({
|
||||
'Type': '/Action',
|
||||
'S': '/URI',
|
||||
'URI': pydyf.String(link_target),
|
||||
})
|
||||
pdf.add_object(box.link_annotation)
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
page['Annots'].append(box.link_annotation.reference)
|
||||
|
||||
for anchor in anchors:
|
||||
anchor_name, x, y = anchor
|
||||
x, y = matrix.transform_point(x, y)
|
||||
names.append([
|
||||
anchor_name, pydyf.Array([page.reference, '/XYZ', x, y, 0])])
|
||||
|
||||
|
||||
def add_outlines(pdf, bookmarks, parent=None):
|
||||
"""Include bookmark outlines in PDF."""
|
||||
count = len(bookmarks)
|
||||
outlines = []
|
||||
for title, (page, x, y), children, state in bookmarks:
|
||||
destination = pydyf.Array((pdf.page_references[page], '/XYZ', x, y, 0))
|
||||
outline = pydyf.Dictionary({
|
||||
'Title': pydyf.String(title), 'Dest': destination})
|
||||
pdf.add_object(outline)
|
||||
children_outlines, children_count = add_outlines(
|
||||
pdf, children, parent=outline)
|
||||
outline['Count'] = children_count
|
||||
if state == 'closed':
|
||||
outline['Count'] *= -1
|
||||
else:
|
||||
count += children_count
|
||||
if outlines:
|
||||
outline['Prev'] = outlines[-1].reference
|
||||
outlines[-1]['Next'] = outline.reference
|
||||
if children_outlines:
|
||||
outline['First'] = children_outlines[0].reference
|
||||
outline['Last'] = children_outlines[-1].reference
|
||||
if parent is not None:
|
||||
outline['Parent'] = parent.reference
|
||||
outlines.append(outline)
|
||||
|
||||
if parent is None and outlines:
|
||||
outlines_dictionary = pydyf.Dictionary({
|
||||
'Count': count,
|
||||
'First': outlines[0].reference,
|
||||
'Last': outlines[-1].reference,
|
||||
})
|
||||
pdf.add_object(outlines_dictionary)
|
||||
for outline in outlines:
|
||||
outline['Parent'] = outlines_dictionary.reference
|
||||
pdf.catalog['Outlines'] = outlines_dictionary.reference
|
||||
|
||||
return outlines, count
|
||||
|
||||
|
||||
def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
|
||||
compress):
|
||||
"""Include form inputs in PDF."""
|
||||
if not inputs:
|
||||
return
|
||||
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
if 'AcroForm' not in pdf.catalog:
|
||||
pdf.catalog['AcroForm'] = pydyf.Dictionary({
|
||||
'Fields': pydyf.Array(),
|
||||
'DR': resources.reference,
|
||||
'NeedAppearances': 'true',
|
||||
})
|
||||
page_reference = page['Contents'].split()[0]
|
||||
context = ffi.gc(
|
||||
pango.pango_font_map_create_context(font_map),
|
||||
gobject.g_object_unref)
|
||||
for i, (element, style, rectangle) in enumerate(inputs):
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
|
||||
input_type = element.attrib.get('type')
|
||||
default_name = f'unknown-{page_reference.decode()}-{i}'
|
||||
input_name = element.attrib.get('name', default_name)
|
||||
# TODO: where does this 0.75 scale come from?
|
||||
font_size = style['font_size'] * 0.75
|
||||
field_stream = pydyf.Stream(compress=compress)
|
||||
field_stream.set_color_rgb(*style['color'][:3])
|
||||
if input_type == 'checkbox':
|
||||
# Checkboxes
|
||||
width = rectangle[2] - rectangle[0]
|
||||
height = rectangle[1] - rectangle[3]
|
||||
checked_stream = pydyf.Stream(extra={
|
||||
'Resources': resources.reference,
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array((0, 0, width, height)),
|
||||
}, compress=compress)
|
||||
checked_stream.push_state()
|
||||
checked_stream.begin_text()
|
||||
checked_stream.set_color_rgb(*style['color'][:3])
|
||||
checked_stream.set_font_size('ZaDb', font_size)
|
||||
# Center (let’s assume that Dingbat’s check has a 0.8em size)
|
||||
x = (width - font_size * 0.8) / 2
|
||||
y = (height - font_size * 0.8) / 2
|
||||
checked_stream.move_text_to(x, y)
|
||||
checked_stream.show_text_string('4')
|
||||
checked_stream.end_text()
|
||||
checked_stream.pop_state()
|
||||
pdf.add_object(checked_stream)
|
||||
|
||||
checked = 'checked' in element.attrib
|
||||
field_stream.set_font_size('ZaDb', font_size)
|
||||
field = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'FT': '/Btn',
|
||||
'F': 1 << (3 - 1), # Print flag
|
||||
'P': page.reference,
|
||||
'T': pydyf.String(input_name),
|
||||
'V': '/Yes' if checked else '/Off',
|
||||
'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
|
||||
'Yes': checked_stream.reference,
|
||||
})}),
|
||||
'AS': '/Yes' if checked else '/Off',
|
||||
'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
})
|
||||
elif element.tag == 'select':
|
||||
# Select fields
|
||||
font_description = get_font_description(style)
|
||||
font = pango.pango_font_map_load_font(
|
||||
font_map, context, font_description)
|
||||
font = stream.add_font(font)
|
||||
font.used_in_forms = True
|
||||
|
||||
field_stream.set_font_size(font.hash, font_size)
|
||||
options = []
|
||||
selected_values = []
|
||||
for option in element:
|
||||
value = pydyf.String(option.attrib.get('value', ''))
|
||||
text = pydyf.String(option.text or "")
|
||||
options.append(pydyf.Array([value, text]))
|
||||
if 'selected' in option.attrib:
|
||||
selected_values.append(value)
|
||||
|
||||
field = pydyf.Dictionary({
|
||||
'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
'F': 1 << (3 - 1), # Print flag
|
||||
'FT': '/Ch',
|
||||
'Opt': pydyf.Array(options),
|
||||
'P': page.reference,
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'Subtype': '/Widget',
|
||||
'T': pydyf.String(input_name),
|
||||
'Type': '/Annot',
|
||||
})
|
||||
if 'multiple' in element.attrib:
|
||||
field['Ff'] = 1 << (22 - 1)
|
||||
field['V'] = pydyf.Array(selected_values)
|
||||
else:
|
||||
field['Ff'] = 1 << (18 - 1)
|
||||
field['V'] = (
|
||||
selected_values[-1] if selected_values
|
||||
else pydyf.String(''))
|
||||
else:
|
||||
# Text, password, textarea, files, and unknown
|
||||
font_description = get_font_description(style)
|
||||
font = pango.pango_font_map_load_font(
|
||||
font_map, context, font_description)
|
||||
font = stream.add_font(font)
|
||||
font.used_in_forms = True
|
||||
|
||||
field_stream.set_font_size(font.hash, font_size)
|
||||
value = (
|
||||
element.text if element.tag == 'textarea'
|
||||
else element.attrib.get('value', ''))
|
||||
field = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'FT': '/Tx',
|
||||
'F': 1 << (3 - 1), # Print flag
|
||||
'P': page.reference,
|
||||
'T': pydyf.String(input_name),
|
||||
'V': pydyf.String(value or ''),
|
||||
'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
})
|
||||
if element.tag == 'textarea':
|
||||
field['Ff'] = 1 << (13 - 1)
|
||||
elif input_type == 'password':
|
||||
field['Ff'] = 1 << (14 - 1)
|
||||
elif input_type == 'file':
|
||||
field['Ff'] = 1 << (21 - 1)
|
||||
|
||||
maxlength = element.get('maxlength')
|
||||
if maxlength and maxlength.isdigit():
|
||||
field['MaxLen'] = element.get('maxlength')
|
||||
|
||||
pdf.add_object(field)
|
||||
page['Annots'].append(field.reference)
|
||||
pdf.catalog['AcroForm']['Fields'].append(field.reference)
|
||||
|
||||
|
||||
def add_annotations(links, matrix, document, pdf, page, annot_files, compress):
|
||||
"""Include annotations in PDF."""
|
||||
# TODO: splitting a link into multiple independent rectangular
|
||||
# annotations works well for pure links, but rather mediocre for
|
||||
# other annotations and fails completely for transformed (CSS) or
|
||||
# complex link shapes (area). It would be better to use /AP for all
|
||||
# links and coalesce link shapes that originate from the same HTML
|
||||
# link. This would give a feeling similiar to what browsers do with
|
||||
# links that span multiple lines.
|
||||
for link_type, annot_target, rectangle, _ in links:
|
||||
if link_type != 'attachment':
|
||||
continue
|
||||
if annot_target not in annot_files:
|
||||
# A single link can be split in multiple regions. We don't want
|
||||
# to embed a file multiple times of course, so keep a reference
|
||||
# to every embedded URL and reuse the object number.
|
||||
# TODO: Use the title attribute as description. The comment
|
||||
# above about multiple regions won't always be correct, because
|
||||
# two links might have the same href, but different titles.
|
||||
attachment = Attachment(
|
||||
url=annot_target, url_fetcher=document.url_fetcher)
|
||||
annot_files[annot_target] = write_pdf_attachment(
|
||||
pdf, attachment, compress)
|
||||
annot_file = annot_files[annot_target]
|
||||
if annot_file is None:
|
||||
continue
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
stream = pydyf.Stream([], {
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array(rectangle),
|
||||
}, compress)
|
||||
pdf.add_object(stream)
|
||||
annot = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'Subtype': '/FileAttachment',
|
||||
'T': pydyf.String(),
|
||||
'FS': annot_file.reference,
|
||||
'AP': pydyf.Dictionary({'N': stream.reference}),
|
||||
'AS': '/N',
|
||||
})
|
||||
pdf.add_object(annot)
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
page['Annots'].append(annot.reference)
|
||||
|
||||
|
||||
def write_pdf_attachment(pdf, attachment, compress):
|
||||
"""Write an attachment to the PDF stream."""
|
||||
# Attachments from document links like <link> or <a> can only be URLs.
|
||||
# They're passed in as tuples
|
||||
url = None
|
||||
uncompressed_length = 0
|
||||
stream = b''
|
||||
try:
|
||||
with attachment.source as (_, source, url, _):
|
||||
if isinstance(source, str):
|
||||
source = source.encode()
|
||||
if isinstance(source, bytes):
|
||||
source = io.BytesIO(source)
|
||||
for data in iter(lambda: source.read(4096), b''):
|
||||
uncompressed_length += len(data)
|
||||
stream += data
|
||||
except URLFetchingError as exception:
|
||||
LOGGER.error('Failed to load attachment: %s', exception)
|
||||
return
|
||||
attachment.md5 = md5(stream, usedforsecurity=False).hexdigest()
|
||||
|
||||
# TODO: Use the result object from a URL fetch operation to provide more
|
||||
# details on the possible filename and MIME type.
|
||||
if url and urlsplit(url).path:
|
||||
filename = basename(unquote(urlsplit(url).path))
|
||||
else:
|
||||
filename = 'attachment.bin'
|
||||
mime_type = mimetypes.guess_type(filename, strict=False)[0]
|
||||
if not mime_type:
|
||||
mime_type = 'application/octet-stream'
|
||||
|
||||
creation = pydyf.String(attachment.created.strftime('D:%Y%m%d%H%M%SZ'))
|
||||
mod = pydyf.String(attachment.modified.strftime('D:%Y%m%d%H%M%SZ'))
|
||||
file_extra = pydyf.Dictionary({
|
||||
'Type': '/EmbeddedFile',
|
||||
'Subtype': f'/{mime_type.replace("/", "#2f")}',
|
||||
'Params': pydyf.Dictionary({
|
||||
'CheckSum': f'<{attachment.md5}>',
|
||||
'Size': uncompressed_length,
|
||||
'CreationDate': creation,
|
||||
'ModDate': mod,
|
||||
})
|
||||
})
|
||||
file_stream = pydyf.Stream([stream], file_extra, compress=compress)
|
||||
pdf.add_object(file_stream)
|
||||
|
||||
pdf_attachment = pydyf.Dictionary({
|
||||
'Type': '/Filespec',
|
||||
'F': pydyf.String(),
|
||||
'UF': pydyf.String(filename),
|
||||
'EF': pydyf.Dictionary({'F': file_stream.reference}),
|
||||
'Desc': pydyf.String(attachment.description or ''),
|
||||
})
|
||||
pdf.add_object(pdf_attachment)
|
||||
return pdf_attachment
|
||||
|
||||
|
||||
def resolve_links(pages):
|
||||
"""Resolve internal hyperlinks.
|
||||
|
||||
Links to a missing anchor are removed with a warning.
|
||||
|
||||
If multiple anchors have the same name, the first one is used.
|
||||
|
||||
:returns:
|
||||
A generator yielding lists (one per page) like :attr:`Page.links`,
|
||||
except that ``target`` for internal hyperlinks is
|
||||
``(page_number, x, y)`` instead of an anchor name.
|
||||
The page number is a 0-based index into the :attr:`pages` list,
|
||||
and ``x, y`` are in CSS pixels from the top-left of the page.
|
||||
|
||||
"""
|
||||
anchors = set()
|
||||
paged_anchors = []
|
||||
for i, page in enumerate(pages):
|
||||
paged_anchors.append([])
|
||||
for anchor_name, (point_x, point_y) in page.anchors.items():
|
||||
if anchor_name not in anchors:
|
||||
paged_anchors[-1].append((anchor_name, point_x, point_y))
|
||||
anchors.add(anchor_name)
|
||||
for page in pages:
|
||||
page_links = []
|
||||
for link in page.links:
|
||||
link_type, anchor_name, _, _ = link
|
||||
if link_type == 'internal':
|
||||
if anchor_name not in anchors:
|
||||
LOGGER.error(
|
||||
'No anchor #%s for internal URI reference',
|
||||
anchor_name)
|
||||
else:
|
||||
page_links.append(link)
|
||||
else:
|
||||
# External link
|
||||
page_links.append(link)
|
||||
yield page_links, paged_anchors.pop(0)
|
||||
318
app/.venv/Lib/site-packages/weasyprint/pdf/fonts.py
Normal file
318
app/.venv/Lib/site-packages/weasyprint/pdf/fonts.py
Normal file
@@ -0,0 +1,318 @@
|
||||
"""Fonts integration in PDF."""
|
||||
|
||||
from math import ceil
|
||||
|
||||
import pydyf
|
||||
|
||||
from ..logger import LOGGER
|
||||
|
||||
|
||||
def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options):
|
||||
pdf_fonts = pydyf.Dictionary()
|
||||
fonts_by_file_hash = {}
|
||||
for font in fonts.values():
|
||||
fonts_by_file_hash.setdefault(font.hash, []).append(font)
|
||||
font_references_by_file_hash = {}
|
||||
for file_hash, file_fonts in fonts_by_file_hash.items():
|
||||
# TODO: find why we can have multiple fonts for one font file
|
||||
font = file_fonts[0]
|
||||
if font.bitmap:
|
||||
continue
|
||||
|
||||
# Clean font, optimize and handle emojis
|
||||
cmap = {}
|
||||
if subset and not font.used_in_forms:
|
||||
for file_font in file_fonts:
|
||||
cmap = {**cmap, **file_font.cmap}
|
||||
font.clean(cmap, options['hinting'])
|
||||
|
||||
# Include font
|
||||
if font.type == 'otf':
|
||||
font_extra = pydyf.Dictionary({'Subtype': '/OpenType'})
|
||||
else:
|
||||
font_extra = pydyf.Dictionary({'Length1': len(font.file_content)})
|
||||
font_stream = pydyf.Stream(
|
||||
[font.file_content], font_extra, compress=compress_pdf)
|
||||
pdf.add_object(font_stream)
|
||||
font_references_by_file_hash[file_hash] = font_stream.reference
|
||||
|
||||
for font in fonts.values():
|
||||
if not font.ttfont or (subset and not font.used_in_forms):
|
||||
# Only store widths and map for used glyphs
|
||||
font_widths = font.widths
|
||||
cmap = font.cmap
|
||||
else:
|
||||
# Store width and Unicode map for all glyphs
|
||||
font_widths, cmap = {}, {}
|
||||
for letter, key in font.ttfont.getBestCmap().items():
|
||||
glyph = font.ttfont.getGlyphID(key)
|
||||
if glyph not in cmap:
|
||||
cmap[glyph] = chr(letter)
|
||||
width = font.ttfont.getGlyphSet()[key].width
|
||||
font_widths[glyph] = width * 1000 / font.upem
|
||||
|
||||
max_x = max(font_widths.values()) if font_widths else 0
|
||||
bbox = (0, font.descent, max_x, font.ascent)
|
||||
|
||||
widths = pydyf.Array()
|
||||
for i in sorted(font_widths):
|
||||
if i - 1 not in font_widths:
|
||||
widths.append(i)
|
||||
current_widths = pydyf.Array()
|
||||
widths.append(current_widths)
|
||||
current_widths.append(font_widths[i])
|
||||
|
||||
font_file = f'FontFile{3 if font.type == "otf" else 2}'
|
||||
to_unicode = pydyf.Stream([
|
||||
b'/CIDInit /ProcSet findresource begin',
|
||||
b'12 dict begin',
|
||||
b'begincmap',
|
||||
b'/CIDSystemInfo',
|
||||
b'<< /Registry (Adobe)',
|
||||
b'/Ordering (UCS)',
|
||||
b'/Supplement 0',
|
||||
b'>> def',
|
||||
b'/CMapName /Adobe-Identity-UCS def',
|
||||
b'/CMapType 2 def',
|
||||
b'1 begincodespacerange',
|
||||
b'<0000> <ffff>',
|
||||
b'endcodespacerange',
|
||||
f'{len(cmap)} beginbfchar'.encode()], compress=compress_pdf)
|
||||
for glyph, text in cmap.items():
|
||||
unicode_codepoints = ''.join(
|
||||
f'{letter.encode("utf-16-be").hex()}' for letter in text)
|
||||
to_unicode.stream.append(
|
||||
f'<{glyph:04x}> <{unicode_codepoints}>'.encode())
|
||||
to_unicode.stream.extend([
|
||||
b'endbfchar',
|
||||
b'endcmap',
|
||||
b'CMapName currentdict /CMap defineresource pop',
|
||||
b'end',
|
||||
b'end'])
|
||||
pdf.add_object(to_unicode)
|
||||
font_dictionary = pydyf.Dictionary({
|
||||
'Type': '/Font',
|
||||
'Subtype': f'/Type{3 if font.bitmap else 0}',
|
||||
'BaseFont': font.name,
|
||||
'ToUnicode': to_unicode.reference,
|
||||
})
|
||||
|
||||
if font.bitmap:
|
||||
_build_bitmap_font_dictionary(
|
||||
font_dictionary, pdf, font, widths, compress_pdf, subset)
|
||||
else:
|
||||
flags = font.flags
|
||||
if len(widths) > 1 and len(set(font.widths.values())) == 1:
|
||||
flags += 2 ** (1 - 1) # FixedPitch
|
||||
font_descriptor = pydyf.Dictionary({
|
||||
'Type': '/FontDescriptor',
|
||||
'FontName': font.name,
|
||||
'FontFamily': pydyf.String(font.family),
|
||||
'Flags': flags,
|
||||
'FontBBox': pydyf.Array(bbox),
|
||||
'ItalicAngle': font.italic_angle,
|
||||
'Ascent': font.ascent,
|
||||
'Descent': font.descent,
|
||||
'CapHeight': bbox[3],
|
||||
'StemV': font.stemv,
|
||||
'StemH': font.stemh,
|
||||
font_file: font_references_by_file_hash[font.hash],
|
||||
})
|
||||
if str(options['pdf_version']) <= '1.4': # Cast for bytes and None
|
||||
cids = sorted(font.widths)
|
||||
padded_width = int(ceil((cids[-1] + 1) / 8))
|
||||
bits = ['0'] * padded_width * 8
|
||||
for cid in cids:
|
||||
bits[cid] = '1'
|
||||
stream = pydyf.Stream(
|
||||
(int(''.join(bits), 2).to_bytes(padded_width, 'big'),),
|
||||
compress=compress_pdf)
|
||||
pdf.add_object(stream)
|
||||
font_descriptor['CIDSet'] = stream.reference
|
||||
if font.type == 'otf':
|
||||
font_descriptor['Subtype'] = '/OpenType'
|
||||
pdf.add_object(font_descriptor)
|
||||
subfont_dictionary = pydyf.Dictionary({
|
||||
'Type': '/Font',
|
||||
'Subtype': f'/CIDFontType{0 if font.type == "otf" else 2}',
|
||||
'BaseFont': font.name,
|
||||
'CIDSystemInfo': pydyf.Dictionary({
|
||||
'Registry': pydyf.String('Adobe'),
|
||||
'Ordering': pydyf.String('Identity'),
|
||||
'Supplement': 0,
|
||||
}),
|
||||
'CIDToGIDMap': '/Identity',
|
||||
'W': widths,
|
||||
'FontDescriptor': font_descriptor.reference,
|
||||
})
|
||||
pdf.add_object(subfont_dictionary)
|
||||
font_dictionary['Encoding'] = '/Identity-H'
|
||||
font_dictionary['DescendantFonts'] = pydyf.Array(
|
||||
[subfont_dictionary.reference])
|
||||
pdf.add_object(font_dictionary)
|
||||
pdf_fonts[font.hash] = font_dictionary.reference
|
||||
|
||||
return pdf_fonts
|
||||
|
||||
|
||||
def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths,
|
||||
compress_pdf, subset):
|
||||
# https://docs.microsoft.com/typography/opentype/spec/ebdt
|
||||
font_dictionary['FontBBox'] = pydyf.Array([0, 0, 1, 1])
|
||||
font_dictionary['FontMatrix'] = pydyf.Array([1, 0, 0, 1, 0, 0])
|
||||
if subset:
|
||||
chars = tuple(sorted(font.cmap))
|
||||
else:
|
||||
chars = tuple(range(256))
|
||||
first, last = chars[0], chars[-1]
|
||||
font_dictionary['FirstChar'] = first
|
||||
font_dictionary['LastChar'] = last
|
||||
differences = []
|
||||
for index, index_widths in zip(widths[::2], widths[1::2]):
|
||||
differences.append(index)
|
||||
for i in range(len(index_widths)):
|
||||
if i + index in chars:
|
||||
differences.append(f'/{i + index}')
|
||||
font_dictionary['Encoding'] = pydyf.Dictionary({
|
||||
'Type': '/Encoding',
|
||||
'Differences': pydyf.Array(differences),
|
||||
})
|
||||
char_procs = pydyf.Dictionary({})
|
||||
font_glyphs = font.ttfont['EBDT'].strikeData[0]
|
||||
widths = [0] * (last - first + 1)
|
||||
glyphs_info = {}
|
||||
for key, glyph in font_glyphs.items():
|
||||
glyph_format = glyph.getFormat()
|
||||
glyph_id = font.ttfont.getGlyphID(key)
|
||||
|
||||
# Get and store glyph metrics
|
||||
if glyph_format == 5:
|
||||
data = glyph.data
|
||||
subtables = font.ttfont['EBLC'].strikes[0].indexSubTables
|
||||
for subtable in subtables:
|
||||
first_index = subtable.firstGlyphIndex
|
||||
last_index = subtable.lastGlyphIndex
|
||||
if first_index <= glyph_id <= last_index:
|
||||
height = subtable.metrics.height
|
||||
advance = width = subtable.metrics.width
|
||||
bearing_x = subtable.metrics.horiBearingX
|
||||
bearing_y = subtable.metrics.horiBearingY
|
||||
break
|
||||
else:
|
||||
LOGGER.warning(f'Unknown bitmap metrics for glyph: {glyph_id}')
|
||||
continue
|
||||
else:
|
||||
data_start = 5 if glyph_format in (1, 2, 8) else 8
|
||||
data = glyph.data[data_start:]
|
||||
height, width = glyph.data[0:2]
|
||||
bearing_x = int.from_bytes(glyph.data[2:3], 'big', signed=True)
|
||||
bearing_y = int.from_bytes(glyph.data[3:4], 'big', signed=True)
|
||||
advance = glyph.data[4]
|
||||
position_y = bearing_y - height
|
||||
if glyph_id in chars:
|
||||
widths[glyph_id - first] = advance
|
||||
stride = ceil(width / 8)
|
||||
glyph_info = glyphs_info[glyph_id] = {
|
||||
'width': width,
|
||||
'height': height,
|
||||
'x': bearing_x,
|
||||
'y': position_y,
|
||||
'stride': stride,
|
||||
'bitmap': None,
|
||||
'subglyphs': None,
|
||||
}
|
||||
|
||||
# Decode bitmaps
|
||||
if 0 in (width, height) or not data:
|
||||
glyph_info['bitmap'] = b''
|
||||
elif glyph_format in (1, 6):
|
||||
glyph_info['bitmap'] = data
|
||||
elif glyph_format in (2, 5, 7):
|
||||
padding = (8 - (width % 8)) % 8
|
||||
bits = bin(int(data.hex(), 16))[2:]
|
||||
bits = bits.zfill(8 * len(data))
|
||||
bitmap_bits = ''.join(
|
||||
bits[i * width:(i + 1) * width] + padding * '0'
|
||||
for i in range(height))
|
||||
glyph_info['bitmap'] = int(bitmap_bits, 2).to_bytes(
|
||||
height * stride, 'big')
|
||||
elif glyph_format in (8, 9):
|
||||
subglyphs = glyph_info['subglyphs'] = []
|
||||
i = 0 if glyph_format == 9 else 1
|
||||
number_of_components = int.from_bytes(data[i:i+2], 'big')
|
||||
for j in range(number_of_components):
|
||||
index = (i + 2) + (j * 4)
|
||||
subglyph_id = int.from_bytes(data[index:index+2], 'big')
|
||||
x = int.from_bytes(data[index+2:index+3], 'big', signed=True)
|
||||
y = int.from_bytes(data[index+3:index+4], 'big', signed=True)
|
||||
subglyphs.append({'id': subglyph_id, 'x': x, 'y': y})
|
||||
else: # pragma: no cover
|
||||
LOGGER.warning(f'Unsupported bitmap glyph format: {glyph_format}')
|
||||
glyph_info['bitmap'] = bytes(height * stride)
|
||||
|
||||
for glyph_id, glyph_info in glyphs_info.items():
|
||||
# Don’t store glyph not in cmap
|
||||
if glyph_id not in chars:
|
||||
continue
|
||||
|
||||
# Draw glyph
|
||||
stride = glyph_info['stride']
|
||||
width = glyph_info['width']
|
||||
height = glyph_info['height']
|
||||
x = glyph_info['x']
|
||||
y = glyph_info['y']
|
||||
if glyph_info['bitmap'] is None:
|
||||
length = height * stride
|
||||
bitmap_int = int.from_bytes(bytes(length), 'big')
|
||||
for subglyph in glyph_info['subglyphs']:
|
||||
sub_x = subglyph['x']
|
||||
sub_y = subglyph['y']
|
||||
sub_id = subglyph['id']
|
||||
if sub_id not in glyphs_info:
|
||||
LOGGER.warning(f'Unknown subglyph: {sub_id}')
|
||||
continue
|
||||
subglyph = glyphs_info[sub_id]
|
||||
if subglyph['bitmap'] is None:
|
||||
# TODO: support subglyph in subglyph
|
||||
LOGGER.warning(
|
||||
f'Unsupported subglyph in subglyph: {sub_id}')
|
||||
continue
|
||||
for row_y in range(subglyph['height']):
|
||||
row_slice = slice(
|
||||
row_y * subglyph['stride'],
|
||||
(row_y + 1) * subglyph['stride'])
|
||||
row = subglyph['bitmap'][row_slice]
|
||||
row_int = int.from_bytes(row, 'big')
|
||||
shift = stride * 8 * (height - sub_y - row_y - 1)
|
||||
stride_difference = stride - subglyph['stride']
|
||||
if stride_difference > 0:
|
||||
row_int <<= stride_difference * 8
|
||||
elif stride_difference < 0:
|
||||
row_int >>= -stride_difference * 8
|
||||
if sub_x > 0:
|
||||
row_int >>= sub_x
|
||||
elif sub_x < 0:
|
||||
row_int <<= -sub_x
|
||||
row_int %= 1 << stride * 8
|
||||
row_int <<= shift
|
||||
bitmap_int |= row_int
|
||||
bitmap = bitmap_int.to_bytes(length, 'big')
|
||||
else:
|
||||
bitmap = glyph_info['bitmap']
|
||||
bitmap_stream = pydyf.Stream([
|
||||
b'0 0 d0',
|
||||
f'{width} 0 0 {height} {x} {y} cm'.encode(),
|
||||
b'BI',
|
||||
b'/IM true',
|
||||
b'/W', width,
|
||||
b'/H', height,
|
||||
b'/BPC 1',
|
||||
b'/D [1 0]',
|
||||
b'ID', bitmap, b'EI'
|
||||
], compress=compress_pdf)
|
||||
pdf.add_object(bitmap_stream)
|
||||
char_procs[glyph_id] = bitmap_stream.reference
|
||||
|
||||
pdf.add_object(char_procs)
|
||||
font_dictionary['Widths'] = pydyf.Array(widths)
|
||||
font_dictionary['CharProcs'] = char_procs.reference
|
||||
92
app/.venv/Lib/site-packages/weasyprint/pdf/metadata.py
Normal file
92
app/.venv/Lib/site-packages/weasyprint/pdf/metadata.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""PDF metadata stream generation."""
|
||||
|
||||
from xml.etree.ElementTree import Element, SubElement, register_namespace, tostring
|
||||
|
||||
import pydyf
|
||||
|
||||
from .. import __version__
|
||||
|
||||
# XML namespaces used for metadata
|
||||
NS = {
|
||||
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'xmp': 'http://ns.adobe.com/xap/1.0/',
|
||||
'pdf': 'http://ns.adobe.com/pdf/1.3/',
|
||||
'pdfaid': 'http://www.aiim.org/pdfa/ns/id/',
|
||||
'pdfuaid': 'http://www.aiim.org/pdfua/ns/id/',
|
||||
}
|
||||
for key, value in NS.items():
|
||||
register_namespace(key, value)
|
||||
|
||||
|
||||
def add_metadata(pdf, metadata, variant, version, conformance, compress):
|
||||
"""Add PDF stream of metadata.
|
||||
|
||||
Described in ISO-32000-1:2008, 14.3.2.
|
||||
|
||||
"""
|
||||
# Add metadata
|
||||
namespace = f'pdf{variant}id'
|
||||
rdf = Element(f'{{{NS["rdf"]}}}RDF')
|
||||
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
|
||||
if conformance:
|
||||
element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
|
||||
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element.attrib[f'{{{NS["pdf"]}}}Producer'] = f'WeasyPrint {__version__}'
|
||||
|
||||
if metadata.title:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["dc"]}}}title')
|
||||
element = SubElement(element, f'{{{NS["rdf"]}}}Alt')
|
||||
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
||||
element.attrib['xml:lang'] = 'x-default'
|
||||
element.text = metadata.title
|
||||
if metadata.authors:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["dc"]}}}creator')
|
||||
element = SubElement(element, f'{{{NS["rdf"]}}}Seq')
|
||||
for author in metadata.authors:
|
||||
author_element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
||||
author_element.text = author
|
||||
if metadata.description:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["dc"]}}}subject')
|
||||
element = SubElement(element, f'{{{NS["rdf"]}}}Bag')
|
||||
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
||||
element.text = metadata.description
|
||||
if metadata.keywords:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["pdf"]}}}Keywords')
|
||||
element.text = ', '.join(metadata.keywords)
|
||||
if metadata.generator:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["xmp"]}}}CreatorTool')
|
||||
element.text = metadata.generator
|
||||
if metadata.created:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["xmp"]}}}CreateDate')
|
||||
element.text = metadata.created
|
||||
if metadata.modified:
|
||||
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
||||
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
||||
element = SubElement(element, f'{{{NS["xmp"]}}}ModifyDate')
|
||||
element.text = metadata.modified
|
||||
xml = tostring(rdf, encoding='utf-8')
|
||||
header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>'
|
||||
footer = b'<?xpacket end="r"?>'
|
||||
stream_content = b'\n'.join((header, xml, footer))
|
||||
extra = {'Type': '/Metadata', 'Subtype': '/XML'}
|
||||
metadata = pydyf.Stream([stream_content], extra, compress)
|
||||
pdf.add_object(metadata)
|
||||
pdf.catalog['Metadata'] = metadata.reference
|
||||
117
app/.venv/Lib/site-packages/weasyprint/pdf/pdfa.py
Normal file
117
app/.venv/Lib/site-packages/weasyprint/pdf/pdfa.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""PDF/A generation."""
|
||||
|
||||
try:
|
||||
# Available in Python 3.9+
|
||||
from importlib.resources import files
|
||||
except ImportError:
|
||||
# Deprecated in Python 3.11+
|
||||
from importlib.resources import read_binary
|
||||
else:
|
||||
def read_binary(package, resource):
|
||||
return (files(package) / resource).read_bytes()
|
||||
|
||||
from functools import partial
|
||||
|
||||
import pydyf
|
||||
|
||||
from .metadata import add_metadata
|
||||
|
||||
|
||||
def pdfa(pdf, metadata, document, page_streams, attachments, compress,
|
||||
version, variant):
|
||||
"""Set metadata for PDF/A documents."""
|
||||
# Add ICC profile.
|
||||
profile = pydyf.Stream(
|
||||
[read_binary(__package__, 'sRGB2014.icc')],
|
||||
pydyf.Dictionary({'N': 3, 'Alternate': '/DeviceRGB'}),
|
||||
compress=compress)
|
||||
pdf.add_object(profile)
|
||||
pdf.catalog['OutputIntents'] = pydyf.Array([
|
||||
pydyf.Dictionary({
|
||||
'Type': '/OutputIntent',
|
||||
'S': '/GTS_PDFA1',
|
||||
'OutputConditionIdentifier': pydyf.String('sRGB IEC61966-2.1'),
|
||||
'DestOutputProfile': profile.reference,
|
||||
}),
|
||||
])
|
||||
|
||||
# Handle attachments.
|
||||
if version == 1:
|
||||
# Remove embedded files dictionary.
|
||||
if 'Names' in pdf.catalog and 'EmbeddedFiles' in pdf.catalog['Names']:
|
||||
del pdf.catalog['Names']['EmbeddedFiles']
|
||||
if version <= 2:
|
||||
# Remove attachments.
|
||||
for pdf_object in pdf.objects:
|
||||
if not isinstance(pdf_object, dict):
|
||||
continue
|
||||
if pdf_object.get('Type') != '/Filespec':
|
||||
continue
|
||||
reference = int(pdf_object['EF']['F'].split()[0])
|
||||
stream = pdf.objects[reference]
|
||||
# Remove all attachments for version 1.
|
||||
# Remove non-PDF attachments for version 2.
|
||||
# TODO: check that PDFs are actually PDF/A-2+ files.
|
||||
if version == 1 or stream.extra['Subtype'] != '/application#2fpdf':
|
||||
del pdf_object['EF']
|
||||
if version >= 3:
|
||||
# Add AF for attachments.
|
||||
relationships = {
|
||||
f'<{attachment.md5}>': attachment.relationship
|
||||
for attachment in attachments if attachment.md5}
|
||||
pdf_attachments = []
|
||||
if 'Names' in pdf.catalog and 'EmbeddedFiles' in pdf.catalog['Names']:
|
||||
reference = int(pdf.catalog['Names']['EmbeddedFiles'].split()[0])
|
||||
names = pdf.objects[reference]
|
||||
for name in names['Names'][1::2]:
|
||||
pdf_attachments.append(name)
|
||||
for pdf_object in pdf.objects:
|
||||
if not isinstance(pdf_object, dict):
|
||||
continue
|
||||
if pdf_object.get('Type') != '/Filespec':
|
||||
continue
|
||||
reference = int(pdf_object['EF']['F'].split()[0])
|
||||
checksum = pdf.objects[reference].extra['Params']['CheckSum']
|
||||
relationship = relationships.get(checksum, 'Unspecified')
|
||||
pdf_object['AFRelationship'] = f'/{relationship}'
|
||||
pdf_attachments.append(pdf_object.reference)
|
||||
if pdf_attachments:
|
||||
if 'AF' not in pdf.catalog:
|
||||
pdf.catalog['AF'] = pydyf.Array()
|
||||
pdf.catalog['AF'].extend(pdf_attachments)
|
||||
|
||||
# Print annotations.
|
||||
for pdf_object in pdf.objects:
|
||||
if isinstance(pdf_object, dict) and pdf_object.get('Type') == '/Annot':
|
||||
pdf_object['F'] = 2 ** (3 - 1)
|
||||
|
||||
# Common PDF metadata stream.
|
||||
if version == 1:
|
||||
# Metadata compression is forbidden for version 1.
|
||||
compress = False
|
||||
add_metadata(pdf, metadata, 'a', version, variant, compress)
|
||||
|
||||
|
||||
VARIANTS = {
|
||||
'pdf/a-1b': (
|
||||
partial(pdfa, version=1, variant='B'),
|
||||
{'version': '1.4', 'identifier': True}),
|
||||
'pdf/a-2b': (
|
||||
partial(pdfa, version=2, variant='B'),
|
||||
{'version': '1.7', 'identifier': True}),
|
||||
'pdf/a-3b': (
|
||||
partial(pdfa, version=3, variant='B'),
|
||||
{'version': '1.7', 'identifier': True}),
|
||||
'pdf/a-4b': (
|
||||
partial(pdfa, version=4, variant='B'),
|
||||
{'version': '2.0', 'identifier': True}),
|
||||
'pdf/a-2u': (
|
||||
partial(pdfa, version=2, variant='U'),
|
||||
{'version': '1.7', 'identifier': True}),
|
||||
'pdf/a-3u': (
|
||||
partial(pdfa, version=3, variant='U'),
|
||||
{'version': '1.7', 'identifier': True}),
|
||||
'pdf/a-4u': (
|
||||
partial(pdfa, version=4, variant='U'),
|
||||
{'version': '2.0', 'identifier': True}),
|
||||
}
|
||||
125
app/.venv/Lib/site-packages/weasyprint/pdf/pdfua.py
Normal file
125
app/.venv/Lib/site-packages/weasyprint/pdf/pdfua.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""PDF/UA generation."""
|
||||
|
||||
import pydyf
|
||||
|
||||
from .metadata import add_metadata
|
||||
|
||||
|
||||
def pdfua(pdf, metadata, document, page_streams, attachments, compress):
|
||||
"""Set metadata for PDF/UA documents."""
|
||||
# Structure for PDF tagging
|
||||
content_mapping = pydyf.Dictionary({})
|
||||
pdf.add_object(content_mapping)
|
||||
structure_root = pydyf.Dictionary({
|
||||
'Type': '/StructTreeRoot',
|
||||
'ParentTree': content_mapping.reference,
|
||||
})
|
||||
pdf.add_object(structure_root)
|
||||
structure_document = pydyf.Dictionary({
|
||||
'Type': '/StructElem',
|
||||
'S': '/Document',
|
||||
'P': structure_root.reference,
|
||||
})
|
||||
pdf.add_object(structure_document)
|
||||
structure_root['K'] = pydyf.Array([structure_document.reference])
|
||||
pdf.catalog['StructTreeRoot'] = structure_root.reference
|
||||
|
||||
document_children = []
|
||||
content_mapping['Nums'] = pydyf.Array()
|
||||
links = []
|
||||
for page_number, page_stream in enumerate(page_streams):
|
||||
structure = {}
|
||||
document.build_element_structure(structure)
|
||||
parents = [None] * len(page_stream.marked)
|
||||
for mcid, (key, box) in enumerate(page_stream.marked):
|
||||
# Build structure elements
|
||||
kids = [mcid]
|
||||
if key == 'Link':
|
||||
object_reference = pydyf.Dictionary({
|
||||
'Type': '/OBJR',
|
||||
'Obj': box.link_annotation.reference,
|
||||
'Pg': pdf.page_references[page_number],
|
||||
})
|
||||
pdf.add_object(object_reference)
|
||||
links.append((object_reference.reference, box.link_annotation))
|
||||
etree_element = box.element
|
||||
child_structure_data_element = None
|
||||
while True:
|
||||
if etree_element is None:
|
||||
structure_data = structure.setdefault(
|
||||
box, {'parent': None})
|
||||
else:
|
||||
structure_data = structure[etree_element]
|
||||
new_element = 'element' not in structure_data
|
||||
if new_element:
|
||||
child = structure_data['element'] = pydyf.Dictionary({
|
||||
'Type': '/StructElem',
|
||||
'S': f'/{key}',
|
||||
'K': pydyf.Array(kids),
|
||||
'Pg': pdf.page_references[page_number],
|
||||
})
|
||||
pdf.add_object(child)
|
||||
if key == 'LI':
|
||||
if etree_element.tag == 'dt':
|
||||
sub_key = 'Lbl'
|
||||
else:
|
||||
sub_key = 'LBody'
|
||||
real_child = pydyf.Dictionary({
|
||||
'Type': '/StructElem',
|
||||
'S': f'/{sub_key}',
|
||||
'K': pydyf.Array(kids),
|
||||
'Pg': pdf.page_references[page_number],
|
||||
'P': child.reference,
|
||||
})
|
||||
pdf.add_object(real_child)
|
||||
for kid in kids:
|
||||
if isinstance(kid, int):
|
||||
parents[kid] = real_child.reference
|
||||
child['K'] = pydyf.Array([real_child.reference])
|
||||
structure_data['element'] = real_child
|
||||
else:
|
||||
for kid in kids:
|
||||
if isinstance(kid, int):
|
||||
parents[kid] = child.reference
|
||||
else:
|
||||
child = structure_data['element']
|
||||
child['K'].extend(kids)
|
||||
for kid in kids:
|
||||
if isinstance(kid, int):
|
||||
parents[kid] = child.reference
|
||||
kid = child.reference
|
||||
if child_structure_data_element is not None:
|
||||
child_structure_data_element['P'] = kid
|
||||
if not new_element:
|
||||
break
|
||||
kids = [kid]
|
||||
child_structure_data_element = child
|
||||
if structure_data['parent'] is None:
|
||||
child['P'] = structure_document.reference
|
||||
document_children.append(child.reference)
|
||||
break
|
||||
else:
|
||||
etree_element = structure_data['parent']
|
||||
key = page_stream.get_marked_content_tag(etree_element.tag)
|
||||
content_mapping['Nums'].append(page_number)
|
||||
content_mapping['Nums'].append(pydyf.Array(parents))
|
||||
structure_document['K'] = pydyf.Array(document_children)
|
||||
for i, (link, annotation) in enumerate(links, start=page_number + 1):
|
||||
content_mapping['Nums'].append(i)
|
||||
content_mapping['Nums'].append(link)
|
||||
annotation['StructParent'] = i
|
||||
annotation['F'] = 2 ** (2 - 1)
|
||||
|
||||
# Common PDF metadata stream
|
||||
add_metadata(pdf, metadata, 'ua', 1, conformance=None, compress=compress)
|
||||
|
||||
# PDF document extra metadata
|
||||
if 'Lang' not in pdf.catalog:
|
||||
pdf.catalog['Lang'] = pydyf.String()
|
||||
pdf.catalog['ViewerPreferences'] = pydyf.Dictionary({
|
||||
'DisplayDocTitle': 'true',
|
||||
})
|
||||
pdf.catalog['MarkInfo'] = pydyf.Dictionary({'Marked': 'true'})
|
||||
|
||||
|
||||
VARIANTS = {'pdf/ua-1': (pdfua, {'mark': True})}
|
||||
BIN
app/.venv/Lib/site-packages/weasyprint/pdf/sRGB2014.icc
Normal file
BIN
app/.venv/Lib/site-packages/weasyprint/pdf/sRGB2014.icc
Normal file
Binary file not shown.
489
app/.venv/Lib/site-packages/weasyprint/pdf/stream.py
Normal file
489
app/.venv/Lib/site-packages/weasyprint/pdf/stream.py
Normal file
@@ -0,0 +1,489 @@
|
||||
"""PDF stream."""
|
||||
|
||||
import io
|
||||
from hashlib import md5
|
||||
|
||||
import pydyf
|
||||
from fontTools import subset
|
||||
from fontTools.ttLib import TTFont, TTLibError, ttFont
|
||||
from fontTools.varLib.mutator import instantiateVariableFont
|
||||
|
||||
from ..logger import LOGGER
|
||||
from ..matrix import Matrix
|
||||
from ..text.constants import PANGO_STRETCH_PERCENT
|
||||
from ..text.ffi import ffi, harfbuzz, pango, units_to_double
|
||||
from ..text.fonts import get_hb_object_data, get_pango_font_hb_face, get_pango_font_key
|
||||
|
||||
|
||||
class Font:
|
||||
def __init__(self, pango_font):
|
||||
self.hb_font = pango.pango_font_get_hb_font(pango_font)
|
||||
self.hb_face = get_pango_font_hb_face(pango_font)
|
||||
self.file_content = get_hb_object_data(self.hb_face)
|
||||
self.index = harfbuzz.hb_face_get_index(self.hb_face)
|
||||
|
||||
pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
|
||||
self.description = description = ffi.gc(
|
||||
pango.pango_font_describe(pango_font),
|
||||
pango.pango_font_description_free)
|
||||
self.font_size = pango.pango_font_description_get_size(description)
|
||||
self.style = pango.pango_font_description_get_style(description)
|
||||
self.family = ffi.string(
|
||||
pango.pango_font_description_get_family(description))
|
||||
|
||||
self.variations = {}
|
||||
variations = pango.pango_font_description_get_variations(
|
||||
self.description)
|
||||
if variations != ffi.NULL:
|
||||
self.variations = {
|
||||
part.split('=')[0]: float(part.split('=')[1])
|
||||
for part in ffi.string(variations).decode().split(',')}
|
||||
if 'wght' in self.variations:
|
||||
pango.pango_font_description_set_weight(
|
||||
self.description, int(round(self.variations['wght'])))
|
||||
if self.variations.get('ital'):
|
||||
pango.pango_font_description_set_style(
|
||||
self.description, pango.PANGO_STYLE_ITALIC)
|
||||
elif self.variations.get('slnt'):
|
||||
pango.pango_font_description_set_style(
|
||||
self.description, pango.PANGO_STYLE_OBLIQUE)
|
||||
if 'wdth' in self.variations:
|
||||
stretch = min(
|
||||
PANGO_STRETCH_PERCENT.items(),
|
||||
key=lambda item: abs(item[0] - self.variations['wdth']))[1]
|
||||
pango.pango_font_description_set_stretch(self.description, stretch)
|
||||
description_string = ffi.string(
|
||||
pango.pango_font_description_to_string(description))
|
||||
|
||||
# Never use the built-in hash function here: it’s not stable
|
||||
self.hash = ''.join(
|
||||
chr(65 + letter % 26) for letter
|
||||
in md5(description_string, usedforsecurity=False).digest()[:6])
|
||||
|
||||
# Name
|
||||
fields = description_string.split(b' ')
|
||||
if fields and b'=' in fields[-1]:
|
||||
fields.pop() # Remove variations
|
||||
if fields:
|
||||
fields.pop() # Remove font size
|
||||
else:
|
||||
fields = [b'Unknown']
|
||||
self.name = b'/' + self.hash.encode() + b'+' + b'-'.join(fields)
|
||||
|
||||
# Ascent & descent
|
||||
if self.font_size:
|
||||
self.ascent = int(
|
||||
pango.pango_font_metrics_get_ascent(pango_metrics) /
|
||||
self.font_size * 1000)
|
||||
self.descent = -int(
|
||||
pango.pango_font_metrics_get_descent(pango_metrics) /
|
||||
self.font_size * 1000)
|
||||
else:
|
||||
self.ascent = self.descent = 0
|
||||
|
||||
# Fonttools
|
||||
full_font = io.BytesIO(self.file_content)
|
||||
try:
|
||||
self.ttfont = TTFont(full_font, fontNumber=self.index)
|
||||
except Exception:
|
||||
LOGGER.warning('Unable to read font')
|
||||
self.ttfont = None
|
||||
self.bitmap = False
|
||||
else:
|
||||
self.bitmap = (
|
||||
'EBDT' in self.ttfont and 'EBLC' in self.ttfont and (
|
||||
'glyf' not in self.ttfont or not self.ttfont['glyf'].glyphs))
|
||||
|
||||
# Various properties
|
||||
self.italic_angle = 0 # TODO: this should be different
|
||||
self.upem = harfbuzz.hb_face_get_upem(self.hb_face)
|
||||
self.png = harfbuzz.hb_ot_color_has_png(self.hb_face)
|
||||
self.svg = harfbuzz.hb_ot_color_has_svg(self.hb_face)
|
||||
self.stemv = 80
|
||||
self.stemh = 80
|
||||
self.widths = {}
|
||||
self.cmap = {}
|
||||
self.used_in_forms = False
|
||||
|
||||
# Font flags
|
||||
self.flags = 2 ** (3 - 1) # Symbolic, custom character set
|
||||
if self.style:
|
||||
self.flags += 2 ** (7 - 1) # Italic
|
||||
if b'Serif' in fields:
|
||||
self.flags += 2 ** (2 - 1) # Serif
|
||||
|
||||
def clean(self, cmap, hinting):
|
||||
if self.ttfont is None:
|
||||
return
|
||||
|
||||
# Subset font
|
||||
if cmap:
|
||||
optimized_font = io.BytesIO()
|
||||
options = subset.Options(
|
||||
retain_gids=True, passthrough_tables=True,
|
||||
ignore_missing_glyphs=True, hinting=hinting,
|
||||
desubroutinize=True)
|
||||
options.drop_tables += ['GSUB', 'GPOS', 'SVG']
|
||||
subsetter = subset.Subsetter(options)
|
||||
subsetter.populate(gids=cmap)
|
||||
try:
|
||||
subsetter.subset(self.ttfont)
|
||||
except TTLibError:
|
||||
LOGGER.warning('Unable to optimize font')
|
||||
else:
|
||||
self.ttfont.save(optimized_font)
|
||||
self.file_content = optimized_font.getvalue()
|
||||
|
||||
# Transform variable into static font
|
||||
if 'fvar' in self.ttfont:
|
||||
if 'wght' not in self.variations:
|
||||
weight = pango.pango_font_description_get_weight(
|
||||
self.description)
|
||||
self.variations['wght'] = weight
|
||||
if 'opsz' not in self.variations:
|
||||
self.variations['opsz'] = units_to_double(self.font_size)
|
||||
if 'slnt' not in self.variations:
|
||||
slnt = 0
|
||||
if self.style == 1:
|
||||
for axe in self.ttfont['fvar'].axes:
|
||||
if axe.axisTag == 'slnt':
|
||||
if axe.maxValue == 0:
|
||||
slnt = axe.minValue
|
||||
else:
|
||||
slnt = axe.maxValue
|
||||
break
|
||||
self.variations['slnt'] = slnt
|
||||
if 'ital' not in self.variations:
|
||||
self.variations['ital'] = int(self.style == 2)
|
||||
partial_font = io.BytesIO()
|
||||
try:
|
||||
ttfont = instantiateVariableFont(self.ttfont, self.variations)
|
||||
for key, (advance, bearing) in ttfont['hmtx'].metrics.items():
|
||||
if advance < 0:
|
||||
ttfont['hmtx'].metrics[key] = (0, bearing)
|
||||
ttfont.save(partial_font)
|
||||
except Exception:
|
||||
LOGGER.warning('Unable to mutate variable font')
|
||||
else:
|
||||
self.ttfont = ttfont
|
||||
self.file_content = partial_font.getvalue()
|
||||
|
||||
if not (self.png or self.svg):
|
||||
return
|
||||
|
||||
try:
|
||||
# Add empty glyphs instead of PNG or SVG emojis
|
||||
if 'loca' not in self.ttfont or 'glyf' not in self.ttfont:
|
||||
self.ttfont['loca'] = ttFont.getTableClass('loca')()
|
||||
self.ttfont['glyf'] = ttFont.getTableClass('glyf')()
|
||||
self.ttfont['glyf'].glyphOrder = self.ttfont.getGlyphOrder()
|
||||
self.ttfont['glyf'].glyphs = {
|
||||
name: ttFont.getTableModule('glyf').Glyph()
|
||||
for name in self.ttfont['glyf'].glyphOrder}
|
||||
else:
|
||||
for glyph in self.ttfont['glyf'].glyphs:
|
||||
self.ttfont['glyf'][glyph] = (
|
||||
ttFont.getTableModule('glyf').Glyph())
|
||||
for table_name in ('CBDT', 'CBLC', 'SVG '):
|
||||
if table_name in self.ttfont:
|
||||
del self.ttfont[table_name]
|
||||
output_font = io.BytesIO()
|
||||
self.ttfont.save(output_font)
|
||||
self.file_content = output_font.getvalue()
|
||||
except TTLibError:
|
||||
LOGGER.warning('Unable to save emoji font')
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf'
|
||||
|
||||
|
||||
class Stream(pydyf.Stream):
|
||||
"""PDF stream object with extra features."""
|
||||
def __init__(self, fonts, page_rectangle, states, x_objects, patterns,
|
||||
shadings, images, mark, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.page_rectangle = page_rectangle
|
||||
self.marked = []
|
||||
self._fonts = fonts
|
||||
self._states = states
|
||||
self._x_objects = x_objects
|
||||
self._patterns = patterns
|
||||
self._shadings = shadings
|
||||
self._images = images
|
||||
self._mark = mark
|
||||
self._current_color = self._current_color_stroke = None
|
||||
self._current_alpha = self._current_alpha_stroke = None
|
||||
self._current_font = self._current_font_size = None
|
||||
self._old_font = self._old_font_size = None
|
||||
self._ctm_stack = [Matrix()]
|
||||
|
||||
# These objects are used in text.show_first_line
|
||||
self.length = ffi.new('unsigned int *')
|
||||
self.ink_rect = ffi.new('PangoRectangle *')
|
||||
self.logical_rect = ffi.new('PangoRectangle *')
|
||||
|
||||
@property
|
||||
def ctm(self):
|
||||
return self._ctm_stack[-1]
|
||||
|
||||
def push_state(self):
|
||||
super().push_state()
|
||||
self._ctm_stack.append(self.ctm)
|
||||
|
||||
def pop_state(self):
|
||||
if self.stream and self.stream[-1] == b'q':
|
||||
self.stream.pop()
|
||||
else:
|
||||
super().pop_state()
|
||||
self._current_color = self._current_color_stroke = None
|
||||
self._current_alpha = self._current_alpha_stroke = None
|
||||
self._current_font = None
|
||||
self._ctm_stack.pop()
|
||||
assert self._ctm_stack
|
||||
|
||||
def transform(self, a=1, b=0, c=0, d=1, e=0, f=0):
|
||||
super().transform(a, b, c, d, e, f)
|
||||
self._ctm_stack[-1] = Matrix(a, b, c, d, e, f) @ self.ctm
|
||||
|
||||
def begin_text(self):
|
||||
if self.stream and self.stream[-1] == b'ET':
|
||||
self._current_font = self._old_font
|
||||
self.stream.pop()
|
||||
else:
|
||||
super().begin_text()
|
||||
|
||||
def end_text(self):
|
||||
self._old_font, self._current_font = self._current_font, None
|
||||
super().end_text()
|
||||
|
||||
def set_color_rgb(self, r, g, b, stroke=False):
|
||||
if stroke:
|
||||
if (r, g, b) == self._current_color_stroke:
|
||||
return
|
||||
else:
|
||||
self._current_color_stroke = (r, g, b)
|
||||
else:
|
||||
if (r, g, b) == self._current_color:
|
||||
return
|
||||
else:
|
||||
self._current_color = (r, g, b)
|
||||
|
||||
super().set_color_rgb(r, g, b, stroke)
|
||||
|
||||
def set_font_size(self, font, size):
|
||||
if (font, size) == self._current_font:
|
||||
return
|
||||
self._current_font = (font, size)
|
||||
super().set_font_size(font, size)
|
||||
|
||||
def set_state(self, state):
|
||||
key = f's{len(self._states)}'
|
||||
self._states[key] = state
|
||||
super().set_state(key)
|
||||
|
||||
def set_alpha(self, alpha, stroke=False, fill=None):
|
||||
if fill is None:
|
||||
fill = not stroke
|
||||
|
||||
if stroke:
|
||||
key = f'A{alpha}'
|
||||
if key != self._current_alpha_stroke:
|
||||
self._current_alpha_stroke = key
|
||||
if key not in self._states:
|
||||
self._states[key] = pydyf.Dictionary({'CA': alpha})
|
||||
super().set_state(key)
|
||||
|
||||
if fill:
|
||||
key = f'a{alpha}'
|
||||
if key != self._current_alpha:
|
||||
self._current_alpha = key
|
||||
if key not in self._states:
|
||||
self._states[key] = pydyf.Dictionary({'ca': alpha})
|
||||
super().set_state(key)
|
||||
|
||||
def set_alpha_state(self, x, y, width, height):
|
||||
alpha_stream = self.add_group(x, y, width, height)
|
||||
alpha_state = pydyf.Dictionary({
|
||||
'Type': '/ExtGState',
|
||||
'SMask': pydyf.Dictionary({
|
||||
'Type': '/Mask',
|
||||
'S': '/Luminosity',
|
||||
'G': alpha_stream,
|
||||
}),
|
||||
'ca': 1,
|
||||
'AIS': 'false',
|
||||
})
|
||||
self.set_state(alpha_state)
|
||||
return alpha_stream
|
||||
|
||||
def set_blend_mode(self, mode):
|
||||
self.set_state(pydyf.Dictionary({
|
||||
'Type': '/ExtGState',
|
||||
'BM': f'/{mode}',
|
||||
}))
|
||||
|
||||
def add_font(self, pango_font):
|
||||
key = get_pango_font_key(pango_font)
|
||||
if key not in self._fonts:
|
||||
self._fonts[key] = Font(pango_font)
|
||||
return self._fonts[key]
|
||||
|
||||
def add_group(self, x, y, width, height):
|
||||
states = pydyf.Dictionary()
|
||||
x_objects = pydyf.Dictionary()
|
||||
patterns = pydyf.Dictionary()
|
||||
shadings = pydyf.Dictionary()
|
||||
resources = pydyf.Dictionary({
|
||||
'ExtGState': states,
|
||||
'XObject': x_objects,
|
||||
'Pattern': patterns,
|
||||
'Shading': shadings,
|
||||
'Font': None, # Will be set by _use_references
|
||||
})
|
||||
extra = pydyf.Dictionary({
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array((x, y, x + width, y + height)),
|
||||
'Resources': resources,
|
||||
'Group': pydyf.Dictionary({
|
||||
'Type': '/Group',
|
||||
'S': '/Transparency',
|
||||
'I': 'true',
|
||||
'CS': '/DeviceRGB',
|
||||
}),
|
||||
})
|
||||
group = Stream(
|
||||
self._fonts, self.page_rectangle, states, x_objects, patterns,
|
||||
shadings, self._images, self._mark, extra=extra,
|
||||
compress=self.compress)
|
||||
group.id = f'x{len(self._x_objects)}'
|
||||
self._x_objects[group.id] = group
|
||||
return group
|
||||
|
||||
def add_image(self, image, interpolate, ratio):
|
||||
image_name = f'i{image.id}{int(interpolate)}'
|
||||
self._x_objects[image_name] = None # Set by write_pdf
|
||||
if image_name in self._images:
|
||||
# Reuse image already stored in document
|
||||
self._images[image_name]['dpi_ratios'].add(ratio)
|
||||
return image_name
|
||||
|
||||
self._images[image_name] = {
|
||||
'image': image,
|
||||
'interpolate': interpolate,
|
||||
'dpi_ratios': {ratio},
|
||||
'x_object': None, # Set by write_pdf
|
||||
}
|
||||
return image_name
|
||||
|
||||
def add_pattern(self, x, y, width, height, repeat_width, repeat_height,
|
||||
matrix):
|
||||
states = pydyf.Dictionary()
|
||||
x_objects = pydyf.Dictionary()
|
||||
patterns = pydyf.Dictionary()
|
||||
shadings = pydyf.Dictionary()
|
||||
resources = pydyf.Dictionary({
|
||||
'ExtGState': states,
|
||||
'XObject': x_objects,
|
||||
'Pattern': patterns,
|
||||
'Shading': shadings,
|
||||
'Font': None, # Will be set by _use_references
|
||||
})
|
||||
extra = pydyf.Dictionary({
|
||||
'Type': '/Pattern',
|
||||
'PatternType': 1,
|
||||
'BBox': pydyf.Array([x, y, x + width, y + height]),
|
||||
'XStep': repeat_width,
|
||||
'YStep': repeat_height,
|
||||
'TilingType': 1,
|
||||
'PaintType': 1,
|
||||
'Matrix': pydyf.Array(matrix.values),
|
||||
'Resources': resources,
|
||||
})
|
||||
pattern = Stream(
|
||||
self._fonts, self.page_rectangle, states, x_objects, patterns,
|
||||
shadings, self._images, self._mark, extra=extra,
|
||||
compress=self.compress)
|
||||
pattern.id = f'p{len(self._patterns)}'
|
||||
self._patterns[pattern.id] = pattern
|
||||
return pattern
|
||||
|
||||
def add_shading(self, shading_type, color_space, domain, coords, extend,
|
||||
function):
|
||||
shading = pydyf.Dictionary({
|
||||
'ShadingType': shading_type,
|
||||
'ColorSpace': f'/Device{color_space}',
|
||||
'Domain': pydyf.Array(domain),
|
||||
'Coords': pydyf.Array(coords),
|
||||
'Function': function,
|
||||
})
|
||||
if extend:
|
||||
shading['Extend'] = pydyf.Array((b'true', b'true'))
|
||||
shading.id = f's{len(self._shadings)}'
|
||||
self._shadings[shading.id] = shading
|
||||
return shading
|
||||
|
||||
def begin_marked_content(self, box, mcid=False, tag=None):
|
||||
if not self._mark:
|
||||
return
|
||||
property_list = None
|
||||
if tag is None:
|
||||
tag = self.get_marked_content_tag(box.element_tag)
|
||||
if mcid:
|
||||
property_list = pydyf.Dictionary({'MCID': len(self.marked)})
|
||||
self.marked.append((tag, box))
|
||||
super().begin_marked_content(tag, property_list)
|
||||
|
||||
def end_marked_content(self):
|
||||
if not self._mark:
|
||||
return
|
||||
super().end_marked_content()
|
||||
|
||||
@staticmethod
|
||||
def create_interpolation_function(domain, c0, c1, n):
|
||||
return pydyf.Dictionary({
|
||||
'FunctionType': 2,
|
||||
'Domain': pydyf.Array(domain),
|
||||
'C0': pydyf.Array(c0),
|
||||
'C1': pydyf.Array(c1),
|
||||
'N': n,
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
def create_stitching_function(domain, encode, bounds, sub_functions):
|
||||
return pydyf.Dictionary({
|
||||
'FunctionType': 3,
|
||||
'Domain': pydyf.Array(domain),
|
||||
'Encode': pydyf.Array(encode),
|
||||
'Bounds': pydyf.Array(bounds),
|
||||
'Functions': pydyf.Array(sub_functions),
|
||||
})
|
||||
|
||||
def get_marked_content_tag(self, element_tag):
|
||||
if element_tag == 'div':
|
||||
return 'Div'
|
||||
elif element_tag == 'span':
|
||||
return 'Span'
|
||||
elif element_tag == 'article':
|
||||
return 'Art'
|
||||
elif element_tag == 'section':
|
||||
return 'Sect'
|
||||
elif element_tag == 'blockquote':
|
||||
return 'BlockQuote'
|
||||
elif element_tag == 'p':
|
||||
return 'P'
|
||||
elif element_tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
|
||||
return element_tag.upper()
|
||||
elif element_tag in ('dl', 'ul', 'ol'):
|
||||
return 'L'
|
||||
elif element_tag in ('li', 'dt', 'dd'):
|
||||
return 'LI'
|
||||
elif element_tag == 'table':
|
||||
return 'Table'
|
||||
elif element_tag in ('tr', 'th', 'td'):
|
||||
return element_tag.upper()
|
||||
elif element_tag in ('thead', 'tbody', 'tfoot'):
|
||||
return element_tag[:2].upper() + element_tag[2:]
|
||||
else:
|
||||
return 'NonStruct'
|
||||
Reference in New Issue
Block a user