feat: add comprehensive GitHub workflow and development tools

This commit is contained in:
Stiftung Development
2025-09-06 18:31:54 +02:00
commit ab23d7187e
10224 changed files with 2075210 additions and 0 deletions

View File

@@ -0,0 +1,527 @@
"""Constants used for text layout."""
from functools import lru_cache
from .ffi import pango
# Pango features
PANGO_STYLE = {
'normal': pango.PANGO_STYLE_NORMAL,
'oblique': pango.PANGO_STYLE_OBLIQUE,
'italic': pango.PANGO_STYLE_ITALIC,
}
PANGO_STRETCH = {
'ultra-condensed': pango.PANGO_STRETCH_ULTRA_CONDENSED,
'extra-condensed': pango.PANGO_STRETCH_EXTRA_CONDENSED,
'condensed': pango.PANGO_STRETCH_CONDENSED,
'semi-condensed': pango.PANGO_STRETCH_SEMI_CONDENSED,
'normal': pango.PANGO_STRETCH_NORMAL,
'semi-expanded': pango.PANGO_STRETCH_SEMI_EXPANDED,
'expanded': pango.PANGO_STRETCH_EXPANDED,
'extra-expanded': pango.PANGO_STRETCH_EXTRA_EXPANDED,
'ultra-expanded': pango.PANGO_STRETCH_ULTRA_EXPANDED,
}
# From https://drafts.csswg.org/css-fonts/#font-stretch-prop
PANGO_STRETCH_PERCENT = {
50: pango.PANGO_STRETCH_ULTRA_CONDENSED,
62.5: pango.PANGO_STRETCH_EXTRA_CONDENSED,
75: pango.PANGO_STRETCH_CONDENSED,
87.5: pango.PANGO_STRETCH_SEMI_CONDENSED,
100: pango.PANGO_STRETCH_NORMAL,
112.5: pango.PANGO_STRETCH_SEMI_EXPANDED,
125: pango.PANGO_STRETCH_EXPANDED,
150: pango.PANGO_STRETCH_EXTRA_EXPANDED,
200: pango.PANGO_STRETCH_ULTRA_EXPANDED,
}
PANGO_WRAP_MODE = {
'WRAP_WORD': pango.PANGO_WRAP_WORD,
'WRAP_CHAR': pango.PANGO_WRAP_CHAR,
'WRAP_WORD_CHAR': pango.PANGO_WRAP_WORD_CHAR
}
# Language system tags
# From https://docs.microsoft.com/typography/opentype/spec/languagetags
LST_TO_ISO = {
'aba': 'abq',
'afk': 'afr',
'afr': 'aar',
'agw': 'ahg',
'als': 'gsw',
'alt': 'atv',
'ari': 'aiw',
'ark': 'mhv',
'ath': 'apk',
'avr': 'ava',
'bad': 'bfq',
'bad0': 'bad',
'bag': 'bfy',
'bal': 'krc',
'bau': 'bci',
'bch': 'bcq',
'bgr': 'bul',
'bil': 'byn',
'bkf': 'bla',
'bli': 'bal',
'bln': 'bjt',
'blt': 'bft',
'bmb': 'bam',
'bri': 'bra',
'brm': 'mya',
'bsh': 'bak',
'bti': 'btb',
'chg': 'sgw',
'chh': 'hne',
'chi': 'nya',
'chk': 'ckt',
'chk0': 'chk',
'chu': 'chv',
'chy': 'chy',
'cmr': 'swb',
'crr': 'crx',
'crt': 'crh',
'csl': 'chu',
'csy': 'ces',
'dcr': 'cwd',
'dgr': 'doi',
'djr': 'dje',
'djr0': 'djr',
'dng': 'ada',
'dnk': 'din',
'dri': 'prs',
'dun': 'dng',
'dzn': 'dzo',
'ebi': 'igb',
'ecr': 'crj',
'edo': 'bin',
'erz': 'myv',
'esp': 'spa',
'eti': 'est',
'euq': 'eus',
'evk': 'evn',
'evn': 'eve',
'fan': 'acf',
'fan0': 'fan',
'far': 'fas',
'fji': 'fij',
'fle': 'vls',
'fne': 'enf',
'fos': 'fao',
'fri': 'fry',
'frl': 'fur',
'frp': 'frp',
'fta': 'fuf',
'gad': 'gaa',
'gae': 'gla',
'gal': 'glg',
'gaw': 'gbm',
'gil': 'niv',
'gil0': 'gil',
'gmz': 'guk',
'grn': 'kal',
'gro': 'grt',
'gua': 'grn',
'hai': 'hat',
'hal': 'flm',
'har': 'hoj',
'hbn': 'amf',
'hma': 'mrj',
'hnd': 'hno',
'ho': 'hoc',
'hri': 'har',
'hye0': 'hye',
'ijo': 'ijc',
'ing': 'inh',
'inu': 'iku',
'iri': 'gle',
'irt': 'gle',
'ism': 'smn',
'iwr': 'heb',
'jan': 'jpn',
'jii': 'yid',
'jud': 'lad',
'jul': 'dyu',
'kab': 'kbd',
'kab0': 'kab',
'kac': 'kfr',
'kal': 'kln',
'kar': 'krc',
'keb': 'ktb',
'kge': 'kat',
'kha': 'kjh',
'khk': 'kca',
'khs': 'kca',
'khv': 'kca',
'kis': 'kqs',
'kkn': 'kex',
'klm': 'xal',
'kmb': 'kam',
'kmn': 'kfy',
'kmo': 'kmw',
'kms': 'kxc',
'knr': 'kau',
'kod': 'kfa',
'koh': 'okm',
'kon': 'ktu',
'kon0': 'kon',
'kop': 'koi',
'koz': 'kpv',
'kpl': 'kpe',
'krk': 'kaa',
'krm': 'kdr',
'krn': 'kar',
'krt': 'kqy',
'ksh': 'kas',
'ksh0': 'ksh',
'ksi': 'kha',
'ksm': 'sjd',
'kui': 'kxu',
'kul': 'kfx',
'kuu': 'kru',
'kuy': 'kdt',
'kyk': 'kpy',
'lad': 'lld',
'lah': 'bfu',
'lak': 'lbe',
'lam': 'lmn',
'laz': 'lzz',
'lcr': 'crm',
'ldk': 'lbj',
'lma': 'mhr',
'lmb': 'lif',
'lmw': 'ngl',
'lsb': 'dsb',
'lsm': 'smj',
'lth': 'lit',
'luh': 'luy',
'lvi': 'lav',
'maj': 'mpe',
'mak': 'vmw',
'man': 'mns',
'map': 'arn',
'maw': 'mwr',
'mbn': 'kmb',
'mch': 'mnc',
'mcr': 'crm',
'mde': 'men',
'men': 'mym',
'miz': 'lus',
'mkr': 'mak',
'mle': 'mdy',
'mln': 'mlq',
'mlr': 'mal',
'mly': 'msa',
'mnd': 'mnk',
'mng': 'mon',
'mnk': 'man',
'mnx': 'glv',
'mok': 'mdf',
'mon': 'mnw',
'mth': 'mai',
'mts': 'mlt',
'mun': 'unr',
'nan': 'gld',
'nas': 'nsk',
'ncr': 'csw',
'ndg': 'ndo',
'nhc': 'csw',
'nis': 'dap',
'nkl': 'nyn',
'nko': 'nqo',
'nor': 'nob',
'nsm': 'sme',
'nta': 'nod',
'nto': 'epo',
'nyn': 'nno',
'ocr': 'ojs',
'ojb': 'oji',
'oro': 'orm',
'paa': 'sam',
'pal': 'pli',
'pap': 'plp',
'pap0': 'pap',
'pas': 'pus',
'pgr': 'ell',
'pil': 'fil',
'plg': 'pce',
'plk': 'pol',
'ptg': 'por',
'qin': 'bgr',
'rbu': 'bxr',
'rcr': 'atj',
'rms': 'roh',
'rom': 'ron',
'roy': 'rom',
'rsy': 'rue',
'rua': 'kin',
'sad': 'sck',
'say': 'chp',
'sek': 'xan',
'sel': 'sel',
'sgo': 'sag',
'sgs': 'sgs',
'sib': 'sjo',
'sig': 'xst',
'sks': 'sms',
'sky': 'slk',
'sla': 'scs',
'sml': 'som',
'sna': 'seh',
'sna0': 'sna',
'snh': 'sin',
'sog': 'gru',
'srb': 'srp',
'ssl': 'xsl',
'ssm': 'sma',
'sur': 'suq',
'sve': 'swe',
'swa': 'aii',
'swk': 'swa',
'swz': 'ssw',
'sxt': 'ngo',
'taj': 'tgk',
'tcr': 'cwd',
'tgn': 'ton',
'tgr': 'tig',
'tgy': 'tir',
'tht': 'tah',
'tib': 'bod',
'tkm': 'tuk',
'tmn': 'tem',
'tna': 'tsn',
'tne': 'enh',
'tng': 'toi',
'tod': 'xal',
'tod0': 'tod',
'trk': 'tur',
'tsg': 'tso',
'tua': 'tru',
'tul': 'tcy',
'tuv': 'tyv',
'twi': 'aka',
'usb': 'hsb',
'uyg': 'uig',
'vit': 'vie',
'vro': 'vro',
'wa': 'wbm',
'wag': 'wbr',
'wcr': 'crk',
'wel': 'cym',
'wlf': 'wol',
'xbd': 'khb',
'xhs': 'xho',
'yak': 'sah',
'yba': 'yor',
'ycr': 'cre',
'yim': 'iii',
'zhh': 'zho',
'zhp': 'zho',
'zhs': 'zho',
'zht': 'zho',
'znd': 'zne',
}
# Quotes, from https://github.com/unicode-org/cldr/tree/main/common/main
LANG_QUOTES = {
None: (('', ''), ('', '')), # Default, chosen by user agent
'ab': (('«', ''), ('»', '')),
'agq': (('', ''), ('', '')),
'am': (('«', ''), ('»', '')),
'an': (('«', ''), ('»', '')),
'ar': (('', ''), ('', '')),
'ast': (('«', ''), ('»', '')),
'az_Arab': (('«', ''), ('»', '')),
'az_Cyrl': (('«', ''), ('»', '')),
'bas': (('«', ''), ('»', '')),
'be': (('«', ''), ('»', '')),
'bg': (('',), ('',)),
'blo': (('«', ''), ('»', '')),
'bm': (('«', ''), ('»', '')),
'br': (('«', ''), ('»', '')),
'bs': (('', ''), ('', '')),
'bs_Cyrl': (('', ''), ('', '')),
'ca': (('«', ''), ('»', '')),
'co': (('«',), ('»',)),
'cs': (('', ''), ('', '')),
'cu': (('«', ''), ('»', '')),
'cv': (('«', ''), ('»', '')),
'de': (('', ''), ('', '')),
'dsb': (('', ''), ('', '')),
'dua': (('«', ''), ('»', '')),
'dyo': (('«', ''), ('»', '')),
'el': (('«', ''), ('»', '')),
'el_POLYTON': (('«', ''), ('»', '')),
'es_US': (('«', ''), ('»', '')),
'et': (('', ''), ('', '')),
'eu': (('«', ''), ('»', '')),
'ewo': (('«', ''), ('»', '')),
'fa': (('«', ''), ('»', '')),
'ff': (('', ''), ('', '')),
'fi': (('', ''), ('', '')),
'fr': (('«',), ('»',)),
'fr_CA': (('«', ''), ('»', '')),
'fr_CH': (('«', ''), ('»', '')),
'fur': (('', ''), ('', '')),
'gsw': (('«', ''), ('»', '')),
'he': (('', ''), ('', '')),
'hr': (('', ''), ('', '')),
'hsb': (('', ''), ('', '')),
'hu': (('', '»'), ('', '«')),
'hy': (('«',), ('»',)),
'ia': (('', ''), ('', '')),
'ie': (('«', ''), ('»', '')),
'is': (('', ''), ('', '')),
'it': (('«', ''), ('»', '')),
'it_CH': (('«', ''), ('»', '')),
'ja': (('', ''), ('', '')),
'jgo': (('«', ''), ('»', '')),
'ka': (('', '«'), ('', '»')),
'kab': (('«', ''), ('»', '')),
'kk': (('«', ''), ('»', '')),
'kkj': (('«', ''), ('»', '')),
'kl': (('»', ''), ('«', '')),
'ksf': (('«', ''), ('»', '')),
'ksh': (('', ''), ('', '')),
'ky': (('«', ''), ('»', '')),
'lag': (('', ''), ('', '')),
'lb': (('', ''), ('', '')),
'lij': (('«', ''), ('»', '')),
'lt': (('',), ('',)),
'luy': (('', ''), ('', '')),
'mg': (('«', ''), ('»', '')),
'mk': (('', ''), ('', '')),
'ms_Arab': (('', ''), ('', '')),
'mua': (('«', ''), ('»', '')),
'mzn': (('«', ''), ('»', '')),
'nds': (('', ''), ('', '')),
'nl': (('',), ('',)),
'nmg': (('', '«'), ('', '»')),
'nnh': (('«', ''), ('»', '')),
'no': (('«', ''), ('»', '')),
'nr': (('', ''), ('', '')),
'nso': (('', ''), ('', '')),
'oc': (('«',), ('»',)),
'oc_ES': (('«', ''), ('»', '')),
'os': (('«', ''), ('»', '')),
'pl': (('', '«'), ('', '»')),
'prg': (('',), ('',)),
'pt_PT': (('«', ''), ('»', '')),
'rm': (('«', ''), ('»', '')),
'rn': (('', ''), ('', '')),
'ro': (('', '«'), ('', '»')),
'ru': (('«', ''), ('»', '')),
'rw': (('«', ''), ('»', '')),
'sah': (('«', ''), ('»', '')),
'sc': (('«', ''), ('»', '')),
'sdh': (('«', ''), ('»', '')),
'se': (('', ''), ('', '')),
'sg': (('«', ''), ('»', '')),
'shi': (('«', ''), ('»', '')),
'sk': (('', ''), ('', '')),
'sl': (('', ''), ('', '')),
'sn': (('', ''), ('', '')),
'sq': (('«', ''), ('»', '')),
'sr': (('', ''), ('', '')),
'sr_Latn': (('', ''), ('', '')),
'ss': (('', ''), ('', '')),
'st': (('', ''), ('', '')),
'sv': (('', ''), ('', '')),
'syr': (('', ''), ('', '')),
'szl': (('', '»'), ('', '«')),
'tg': (('»', ''), ('«', '')),
'ti': (('«', ''), ('»', '')),
'ti_ER': (('', ''), ('', '')),
'tk': (('',), ('',)),
'tn': (('', ''), ('', '')),
'ts': (('', ''), ('', '')),
'ug': (('»', ''), ('«', '')),
'uk': (('«', ''), ('»', '')),
'ur': (('', ''), ('', '')),
'uz': (('', ''), ('', '')),
've': (('', ''), ('', '')),
'wae': (('«', ''), ('»', '')),
'yav': (('«',), ('»',)),
'yi': (('', ''), ('', '')),
'yue': (('', ''), ('', '')),
'zgh': (('«', ''), ('»', '')),
'zh_Hant': (('', ''), ('', '')),
}
@lru_cache()
def get_lang_quotes(lang):
if lang in LANG_QUOTES:
return LANG_QUOTES[lang]
# Revert to find long names before short ones
for key, value in tuple(LANG_QUOTES.items())[::-1]:
if key and lang.startswith(key):
return value
return LANG_QUOTES[None]
# Font features
LIGATURE_KEYS = {
'common-ligatures': ['liga', 'clig'],
'historical-ligatures': ['hlig'],
'discretionary-ligatures': ['dlig'],
'contextual': ['calt'],
}
CAPS_KEYS = {
'small-caps': ['smcp'],
'all-small-caps': ['c2sc', 'smcp'],
'petite-caps': ['pcap'],
'all-petite-caps': ['c2pc', 'pcap'],
'unicase': ['unic'],
'titling-caps': ['titl'],
}
NUMERIC_KEYS = {
'lining-nums': 'lnum',
'oldstyle-nums': 'onum',
'proportional-nums': 'pnum',
'tabular-nums': 'tnum',
'diagonal-fractions': 'frac',
'stacked-fractions': 'afrc',
'ordinal': 'ordn',
'slashed-zero': 'zero',
}
EAST_ASIAN_KEYS = {
'jis78': 'jp78',
'jis83': 'jp83',
'jis90': 'jp90',
'jis04': 'jp04',
'simplified': 'smpl',
'traditional': 'trad',
'full-width': 'fwid',
'proportional-width': 'pwid',
'ruby': 'ruby',
}
# Fontconfig features
FONTCONFIG_WEIGHT = {
'normal': 80,
'bold': 200,
100: 0,
200: 40,
300: 50,
400: 80,
500: 100,
600: 180,
700: 200,
800: 205,
900: 210,
}
FONTCONFIG_STYLE = {
'normal': 'roman',
'italic': 'italic',
'oblique': 'oblique',
}
FONTCONFIG_STRETCH = {
'normal': 'normal',
'ultra-condensed': 'ultracondensed',
'extra-condensed': 'extracondensed',
'condensed': 'condensed',
'semi-condensed': 'semicondensed',
'semi-expanded': 'semiexpanded',
'expanded': 'expanded',
'extra-expanded': 'extraexpanded',
'ultra-expanded': 'ultraexpanded',
}

View File

@@ -0,0 +1,461 @@
"""Imports of dynamic libraries used for text layout."""
import os
from contextlib import suppress
import cffi
ffi = cffi.FFI()
ffi.cdef('''
// HarfBuzz
typedef ... hb_font_t;
typedef ... hb_face_t;
typedef ... hb_blob_t;
typedef uint32_t hb_codepoint_t;
hb_blob_t * hb_face_reference_blob (hb_face_t *face);
unsigned int hb_face_get_index (const hb_face_t *face);
unsigned int hb_face_get_upem (const hb_face_t *face);
const char * hb_blob_get_data (hb_blob_t *blob, unsigned int *length);
bool hb_ot_color_has_png (hb_face_t *face);
hb_blob_t * hb_ot_color_glyph_reference_png (
hb_font_t *font, hb_codepoint_t glyph);
bool hb_ot_color_has_svg (hb_face_t *face);
hb_blob_t * hb_ot_color_glyph_reference_svg (
hb_face_t *face, hb_codepoint_t glyph);
void hb_blob_destroy (hb_blob_t *blob);
// Pango
typedef unsigned int guint;
typedef int gint;
typedef char gchar;
typedef gint gboolean;
typedef void* gpointer;
typedef ... PangoLayout;
typedef ... PangoContext;
typedef ... PangoFontMap;
typedef ... PangoFontMetrics;
typedef ... PangoLanguage;
typedef ... PangoTabArray;
typedef ... PangoFontDescription;
typedef ... PangoLayoutIter;
typedef ... PangoAttrList;
typedef ... PangoAttrClass;
typedef ... PangoFont;
typedef guint PangoGlyph;
typedef gint PangoGlyphUnit;
const guint PANGO_GLYPH_EMPTY = 0x0FFFFFFF;
const guint PANGO_GLYPH_UNKNOWN_FLAG = 0x10000000;
typedef enum {
PANGO_STYLE_NORMAL,
PANGO_STYLE_OBLIQUE,
PANGO_STYLE_ITALIC
} PangoStyle;
typedef enum {
PANGO_WEIGHT_THIN = 100,
PANGO_WEIGHT_ULTRALIGHT = 200,
PANGO_WEIGHT_LIGHT = 300,
PANGO_WEIGHT_BOOK = 380,
PANGO_WEIGHT_NORMAL = 400,
PANGO_WEIGHT_MEDIUM = 500,
PANGO_WEIGHT_SEMIBOLD = 600,
PANGO_WEIGHT_BOLD = 700,
PANGO_WEIGHT_ULTRABOLD = 800,
PANGO_WEIGHT_HEAVY = 900,
PANGO_WEIGHT_ULTRAHEAVY = 1000
} PangoWeight;
typedef enum {
PANGO_FONT_MASK_SIZE = 1 << 5,
PANGO_FONT_MASK_GRAVITY = 1 << 6,
PANGO_FONT_MASK_VARIATIONS = 1 << 7
} PangoFontMask;
typedef enum {
PANGO_STRETCH_ULTRA_CONDENSED,
PANGO_STRETCH_EXTRA_CONDENSED,
PANGO_STRETCH_CONDENSED,
PANGO_STRETCH_SEMI_CONDENSED,
PANGO_STRETCH_NORMAL,
PANGO_STRETCH_SEMI_EXPANDED,
PANGO_STRETCH_EXPANDED,
PANGO_STRETCH_EXTRA_EXPANDED,
PANGO_STRETCH_ULTRA_EXPANDED
} PangoStretch;
typedef enum {
PANGO_WRAP_WORD,
PANGO_WRAP_CHAR,
PANGO_WRAP_WORD_CHAR
} PangoWrapMode;
typedef enum {
PANGO_TAB_LEFT
} PangoTabAlign;
typedef enum {
PANGO_ELLIPSIZE_NONE,
PANGO_ELLIPSIZE_START,
PANGO_ELLIPSIZE_MIDDLE,
PANGO_ELLIPSIZE_END
} PangoEllipsizeMode;
typedef struct GSList {
gpointer data;
struct GSList *next;
} GSList;
typedef struct {
void *shape_engine;
void *lang_engine;
PangoFont *font;
guint level;
guint gravity;
guint flags;
guint script;
PangoLanguage *language;
GSList *extra_attrs;
} PangoAnalysis;
typedef struct {
gint offset;
gint length;
gint num_chars;
PangoAnalysis analysis;
} PangoItem;
typedef struct {
PangoGlyphUnit width;
PangoGlyphUnit x_offset;
PangoGlyphUnit y_offset;
} PangoGlyphGeometry;
typedef struct {
guint is_cluster_start : 1;
} PangoGlyphVisAttr;
typedef struct {
PangoGlyph glyph;
PangoGlyphGeometry geometry;
PangoGlyphVisAttr attr;
} PangoGlyphInfo;
typedef struct {
gint num_glyphs;
PangoGlyphInfo *glyphs;
gint *log_clusters;
} PangoGlyphString;
typedef struct {
PangoItem *item;
PangoGlyphString *glyphs;
} PangoGlyphItem;
typedef struct GSListRuns {
PangoGlyphItem *data;
struct GSListRuns *next;
} GSListRuns;
typedef struct {
const PangoAttrClass *klass;
guint start_index;
guint end_index;
} PangoAttribute;
typedef struct {
PangoLayout *layout;
gint start_index;
gint length;
GSListRuns *runs;
guint is_paragraph_start : 1;
guint resolved_dir : 3;
} PangoLayoutLine;
typedef struct {
int x;
int y;
int width;
int height;
} PangoRectangle;
typedef struct {
guint is_line_break: 1;
guint is_mandatory_break : 1;
guint is_char_break : 1;
guint is_white : 1;
guint is_cursor_position : 1;
guint is_word_start : 1;
guint is_word_end : 1;
guint is_sentence_boundary : 1;
guint is_sentence_start : 1;
guint is_sentence_end : 1;
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
} PangoLogAttr;
int pango_version (void);
double pango_units_to_double (int i);
int pango_units_from_double (double d);
void g_object_unref (gpointer object);
void g_type_init (void);
PangoLayout * pango_layout_new (PangoContext *context);
void pango_layout_set_width (PangoLayout *layout, int width);
PangoAttrList * pango_layout_get_attributes(PangoLayout *layout);
void pango_layout_set_attributes (
PangoLayout *layout, PangoAttrList *attrs);
void pango_layout_set_text (
PangoLayout *layout, const char *text, int length);
void pango_layout_set_tabs (
PangoLayout *layout, PangoTabArray *tabs);
void pango_layout_set_font_description (
PangoLayout *layout, const PangoFontDescription *desc);
void pango_layout_set_wrap (
PangoLayout *layout, PangoWrapMode wrap);
void pango_layout_set_single_paragraph_mode (
PangoLayout *layout, gboolean setting);
int pango_layout_get_baseline (PangoLayout *layout);
PangoLayoutLine * pango_layout_get_line_readonly (
PangoLayout *layout, int line);
hb_font_t * pango_font_get_hb_font (PangoFont *font);
PangoFontDescription * pango_font_description_new (void);
void pango_font_description_free (PangoFontDescription *desc);
PangoFontDescription * pango_font_description_copy (
const PangoFontDescription *desc);
PangoFontMap* pango_font_get_font_map (PangoFont* font);
void pango_font_description_set_family (
PangoFontDescription *desc, const char *family);
void pango_font_description_set_style (
PangoFontDescription *desc, PangoStyle style);
void pango_font_description_set_stretch (
PangoFontDescription *desc, PangoStretch stretch);
void pango_font_description_set_weight (
PangoFontDescription *desc, PangoWeight weight);
void pango_font_description_set_absolute_size (
PangoFontDescription *desc, double size);
void pango_font_description_set_variations (
PangoFontDescription* desc, const char* variations);
PangoStyle pango_font_description_get_style (
const PangoFontDescription *desc);
const char* pango_font_description_get_variations (
const PangoFontDescription* desc);
PangoWeight pango_font_description_get_weight (
const PangoFontDescription* desc);
int pango_font_description_get_size (PangoFontDescription *desc);
void pango_font_description_unset_fields (
PangoFontDescription* desc, PangoFontMask to_unset);
int pango_glyph_string_get_width (PangoGlyphString *glyphs);
char * pango_font_description_to_string (
const PangoFontDescription *desc);
PangoFontDescription * pango_font_describe (PangoFont *font);
const char * pango_font_description_get_family (
const PangoFontDescription *desc);
guint pango_font_description_hash (const PangoFontDescription *desc);
PangoContext * pango_context_new ();
PangoContext * pango_font_map_create_context (PangoFontMap *fontmap);
PangoFont* pango_font_map_load_font (
PangoFontMap* fontmap, PangoContext* context,
const PangoFontDescription* desc);
PangoFontMetrics * pango_context_get_metrics (
PangoContext *context, const PangoFontDescription *desc,
PangoLanguage *language);
PangoFontMetrics * pango_font_get_metrics (
PangoFont *font, PangoLanguage *language);
void pango_font_metrics_unref (PangoFontMetrics *metrics);
int pango_font_metrics_get_ascent (PangoFontMetrics *metrics);
int pango_font_metrics_get_descent (PangoFontMetrics *metrics);
int pango_font_metrics_get_underline_thickness (
PangoFontMetrics *metrics);
int pango_font_metrics_get_underline_position (
PangoFontMetrics *metrics);
int pango_font_metrics_get_strikethrough_thickness (
PangoFontMetrics *metrics);
int pango_font_metrics_get_strikethrough_position (
PangoFontMetrics *metrics);
void pango_font_get_glyph_extents (
PangoFont *font, PangoGlyph glyph, PangoRectangle *ink_rect,
PangoRectangle *logical_rect);
void pango_context_set_round_glyph_positions (
PangoContext *context, gboolean round_positions);
PangoAttrList * pango_attr_list_new (void);
void pango_attr_list_unref (PangoAttrList *list);
void pango_attr_list_insert (
PangoAttrList *list, PangoAttribute *attr);
void pango_attr_list_change (
PangoAttrList *list, PangoAttribute *attr);
PangoAttribute * pango_attr_font_features_new (const gchar *features);
PangoAttribute * pango_attr_letter_spacing_new (int letter_spacing);
PangoAttribute * pango_attr_insert_hyphens_new (gboolean insert_hyphens);
void pango_attribute_destroy (PangoAttribute *attr);
PangoTabArray * pango_tab_array_new_with_positions (
gint size, gboolean positions_in_pixels, PangoTabAlign first_alignment,
gint first_position, ...);
void pango_tab_array_free (PangoTabArray *tab_array);
PangoLanguage * pango_language_from_string (const char *language);
PangoLanguage * pango_language_get_default (void);
void pango_context_set_language (
PangoContext *context, PangoLanguage *language);
void pango_context_set_font_map (
PangoContext *context, PangoFontMap *font_map);
void pango_layout_line_get_extents (
PangoLayoutLine *line,
PangoRectangle *ink_rect, PangoRectangle *logical_rect);
PangoContext * pango_layout_get_context (PangoLayout *layout);
void pango_layout_set_ellipsize (
PangoLayout *layout,
PangoEllipsizeMode ellipsize);
void pango_get_log_attrs (
const char *text, int length, int level, PangoLanguage *language,
PangoLogAttr *log_attrs, int attrs_len);
// FontConfig
typedef int FcBool;
typedef struct _FcConfig FcConfig;
typedef struct _FcPattern FcPattern;
typedef struct _FcStrList FcStrList;
typedef unsigned char FcChar8;
typedef enum {
FcResultMatch, FcResultNoMatch, FcResultTypeMismatch, FcResultNoId,
FcResultOutOfMemory
} FcResult;
typedef enum {
FcMatchPattern, FcMatchFont, FcMatchScan
} FcMatchKind;
typedef struct _FcFontSet {
int nfont;
int sfont;
FcPattern **fonts;
} FcFontSet;
typedef enum _FcSetName {
FcSetSystem = 0,
FcSetApplication = 1
} FcSetName;
FcConfig * FcInitLoadConfigAndFonts (void);
void FcConfigDestroy (FcConfig *config);
FcBool FcConfigAppFontAddFile (
FcConfig *config, const FcChar8 *file);
FcConfig * FcConfigGetCurrent (void);
FcBool FcConfigSetCurrent (FcConfig *config);
FcBool FcConfigParseAndLoad (
FcConfig *config, const FcChar8 *file, FcBool complain);
FcFontSet * FcConfigGetFonts(FcConfig *config, FcSetName set);
FcStrList * FcConfigGetConfigFiles(FcConfig *config);
FcChar8 * FcStrListNext(FcStrList *list);
void FcDefaultSubstitute (FcPattern *pattern);
FcBool FcConfigSubstitute (
FcConfig *config, FcPattern *p, FcMatchKind kind);
FcPattern * FcPatternCreate (void);
FcPattern * FcPatternDestroy (FcPattern *p);
FcBool FcPatternAddString (
FcPattern *p, const char *object, const FcChar8 *s);
FcResult FcPatternGetString (
FcPattern *p, const char *object, int n, FcChar8 **s);
FcPattern * FcFontMatch (
FcConfig *config, FcPattern *p, FcResult *result);
// PangoFT2
typedef ... PangoFcFont;
typedef ... PangoFcFontMap;
PangoFontMap * pango_ft2_font_map_new (void);
void pango_fc_font_map_set_config (
PangoFcFontMap *fcfontmap, FcConfig *fcconfig);
void pango_fc_font_map_config_changed (PangoFcFontMap *fcfontmap);
hb_face_t* pango_fc_font_map_get_hb_face (
PangoFcFontMap* fcfontmap, PangoFcFont* fcfont);
''')
def _dlopen(ffi, *names):
"""Try various names for the same library, for different platforms."""
for name in names:
with suppress(OSError):
return ffi.dlopen(name)
# Re-raise the exception.
print(
'\n-----\n\n'
'WeasyPrint could not import some external libraries. Please '
'carefully follow the installation steps before reporting an issue:\n'
'https://doc.courtbouillon.org/weasyprint/stable/'
'first_steps.html#installation\n'
'https://doc.courtbouillon.org/weasyprint/stable/'
'first_steps.html#troubleshooting',
'\n\n-----\n') # pragma: no cover
return ffi.dlopen(names[0]) # pragma: no cover
if hasattr(os, 'add_dll_directory'): # pragma: no cover
dll_directories = os.getenv(
'WEASYPRINT_DLL_DIRECTORIES',
'C:\\Program Files\\GTK3-Runtime Win64\\bin').split(';')
for dll_directory in dll_directories:
with suppress((OSError, FileNotFoundError)):
os.add_dll_directory(dll_directory)
gobject = _dlopen(
ffi, 'gobject-2.0-0', 'gobject-2.0', 'libgobject-2.0-0',
'libgobject-2.0.so.0', 'libgobject-2.0.dylib', 'libgobject-2.0-0.dll')
pango = _dlopen(
ffi, 'pango-1.0-0', 'pango-1.0', 'libpango-1.0-0', 'libpango-1.0.so.0',
'libpango-1.0.dylib', 'libpango-1.0-0.dll')
harfbuzz = _dlopen(
ffi, 'harfbuzz', 'harfbuzz-0.0', 'libharfbuzz-0',
'libharfbuzz.so.0', 'libharfbuzz.so.0', 'libharfbuzz.0.dylib',
'libharfbuzz-0.dll')
fontconfig = _dlopen(
ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig.so.1',
'libfontconfig.1.dylib', 'libfontconfig-1.dll')
pangoft2 = _dlopen(
ffi, 'pangoft2-1.0-0', 'pangoft2-1.0', 'libpangoft2-1.0-0',
'libpangoft2-1.0.so.0', 'libpangoft2-1.0.dylib', 'libpangoft2-1.0-0.dll')
gobject.g_type_init()
units_to_double = pango.pango_units_to_double
units_from_double = pango.pango_units_from_double
def unicode_to_char_p(string):
"""Return ``(pointer, bytestring)``.
The byte string must live at least as long as the pointer is used.
"""
bytestring = string.encode().replace(b'\x00', b'')
return ffi.new('char[]', bytestring), bytestring

View File

@@ -0,0 +1,413 @@
"""Interface with external libraries managing fonts installed on the system."""
from hashlib import md5
from io import BytesIO
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp
from warnings import warn
from fontTools.ttLib import TTFont, woff2
from ..logger import LOGGER
from ..urls import FILESYSTEM_ENCODING, fetch
from .constants import ( # isort:skip
CAPS_KEYS, EAST_ASIAN_KEYS, FONTCONFIG_STRETCH, FONTCONFIG_STYLE,
FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS, PANGO_STRETCH, PANGO_STYLE)
from .ffi import ( # isort:skip
ffi, fontconfig, gobject, harfbuzz, pango, pangoft2, unicode_to_char_p,
units_from_double)
def _check_font_configuration(font_config): # pragma: no cover
"""Check whether the given font_config has fonts.
The default fontconfig configuration file may be missing (particularly
on Windows or macOS, where installation of fontconfig isn't as
standardized as on Linux), resulting in "Fontconfig error: Cannot load
default config file".
Fontconfig tries to retrieve the system fonts as fallback, which may or
may not work, especially on macOS, where fonts can be installed at
various loactions. On Windows (at least since fontconfig 2.13) the
fallback seems to work.
If theres no default configuration and the system fonts fallback
fails, or if the configuration file exists but doesnt provide fonts,
output will be ugly.
If you happen to have no fonts and an HTML document without a valid
@font-face, all letters turn into rectangles.
If you happen to have an HTML document with at least one valid
@font-face, all text is styled with that font.
On Windows and macOS we can cause Pango to use native font rendering
instead of rendering fonts with FreeType. But then we must do without
@font-face. Expect other missing features and ugly output.
"""
# Having fonts means: fontconfig's config file returns fonts or
# fontconfig managed to retrieve system fallback-fonts. On Windows the
# fallback stragegy seems to work since fontconfig >= 2.13
fonts = fontconfig.FcConfigGetFonts(font_config, fontconfig.FcSetSystem)
# Of course, with nfont == 1 the user wont be happy, too…
if fonts.nfont > 0:
return
# Find the reason why we have no fonts
config_files = fontconfig.FcConfigGetConfigFiles(font_config)
config_file = fontconfig.FcStrListNext(config_files)
if config_file == ffi.NULL:
warn('FontConfig cannot load default config file. Expect ugly output.')
else:
# Useless config file, or indeed no fonts.
warn('No fonts configured in FontConfig. Expect ugly output.')
_check_font_configuration(ffi.gc(
fontconfig.FcInitLoadConfigAndFonts(), fontconfig.FcConfigDestroy))
class FontConfiguration:
"""A FreeType font configuration.
Keep a list of fonts, including fonts installed on the system, fonts
installed for the current user, and fonts referenced by cascading
stylesheets.
When created, an instance of this class gathers available fonts. It can
then be given to :class:`weasyprint.HTML` methods or to
:class:`weasyprint.CSS` to find fonts in ``@font-face`` rules.
"""
_folder = None # required by __del__ when code stops before __init__ finishes
def __init__(self):
"""Create a FreeType font configuration.
See Behdad's blog:
https://mces.blogspot.fr/2015/05/
how-to-use-custom-application-fonts.html
"""
# Load the main config file and the fonts.
self._fontconfig_config = ffi.gc(
fontconfig.FcInitLoadConfigAndFonts(),
fontconfig.FcConfigDestroy)
self.font_map = ffi.gc(
pangoft2.pango_ft2_font_map_new(), gobject.g_object_unref)
pangoft2.pango_fc_font_map_set_config(
ffi.cast('PangoFcFontMap *', self.font_map),
self._fontconfig_config)
# pango_fc_font_map_set_config keeps a reference to config
fontconfig.FcConfigDestroy(self._fontconfig_config)
# Temporary folder storing fonts and Fontconfig config files
self._folder = Path(mkdtemp(prefix='weasyprint-'))
def add_font_face(self, rule_descriptors, url_fetcher):
features = {
rules[0][0].replace('-', '_'): rules[0][1] for rules in
rule_descriptors.get('font_variant', [])}
key = 'font_feature_settings'
if key in rule_descriptors:
features[key] = rule_descriptors[key]
features_string = ''.join(
f'<string>{key} {value}</string>'
for key, value in font_features(**features).items())
fontconfig_style = fontconfig_weight = fontconfig_stretch = None
if 'font_style' in rule_descriptors:
fontconfig_style = FONTCONFIG_STYLE[rule_descriptors['font_style']]
if 'font_weight' in rule_descriptors:
fontconfig_weight = FONTCONFIG_WEIGHT[rule_descriptors['font_weight']]
if 'font_stretch' in rule_descriptors:
fontconfig_stretch = FONTCONFIG_STRETCH[rule_descriptors['font_stretch']]
config_key = (
f'{rule_descriptors["font_family"]}-{fontconfig_style}-'
f'{fontconfig_weight}-{features_string}').encode()
config_digest = md5(config_key, usedforsecurity=False).hexdigest()
font_path = self._folder / config_digest
if font_path.exists():
return
for font_type, url in rule_descriptors['src']:
if url is None:
continue
if font_type in ('external', 'local'):
config = self._fontconfig_config
if font_type == 'local':
font_name = url.encode()
pattern = ffi.gc(
fontconfig.FcPatternCreate(),
fontconfig.FcPatternDestroy)
fontconfig.FcConfigSubstitute(
config, pattern, fontconfig.FcMatchFont)
fontconfig.FcDefaultSubstitute(pattern)
fontconfig.FcPatternAddString(
pattern, b'fullname', font_name)
fontconfig.FcPatternAddString(
pattern, b'postscriptname', font_name)
family = ffi.new('FcChar8 **')
postscript = ffi.new('FcChar8 **')
result = ffi.new('FcResult *')
matching_pattern = fontconfig.FcFontMatch(
config, pattern, result)
# prevent RuntimeError, see issue #677
if matching_pattern == ffi.NULL:
LOGGER.debug(
'Failed to get matching local font for %r',
font_name.decode())
continue
# TODO: do many fonts have multiple family values?
fontconfig.FcPatternGetString(
matching_pattern, b'fullname', 0, family)
fontconfig.FcPatternGetString(
matching_pattern, b'postscriptname', 0, postscript)
family = ffi.string(family[0])
postscript = ffi.string(postscript[0])
if font_name.lower() in (
family.lower(), postscript.lower()):
filename = ffi.new('FcChar8 **')
fontconfig.FcPatternGetString(
matching_pattern, b'file', 0, filename)
path = ffi.string(filename[0]).decode(
FILESYSTEM_ENCODING)
url = Path(path).as_uri()
else:
LOGGER.debug(
'Failed to load local font %r', font_name.decode())
continue
# Get font content
try:
with fetch(url_fetcher, url) as result:
if 'string' in result:
font = result['string']
else:
font = result['file_obj'].read()
except Exception as exc:
LOGGER.debug('Failed to load font at %r (%s)', url, exc)
continue
# Store font content
try:
# Decode woff and woff2 fonts
if font[:3] == b'wOF':
out = BytesIO()
woff_version_byte = font[3:4]
if woff_version_byte == b'F':
# woff font
ttfont = TTFont(BytesIO(font))
ttfont.flavor = ttfont.flavorData = None
ttfont.save(out)
elif woff_version_byte == b'2':
# woff2 font
woff2.decompress(BytesIO(font), out)
font = out.getvalue()
except Exception as exc:
LOGGER.debug(
'Failed to handle woff font at %r (%s)', url, exc)
continue
font_path.write_bytes(font)
xml_path = self._folder / f'{config_digest}.xml'
xml = ''.join((f'''<?xml version="1.0"?>
<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
<fontconfig>
<match target="scan">
<test name="file" compare="eq">
<string>{font_path}</string>
</test>
<edit name="family" mode="assign_replace">
<string>{rule_descriptors['font_family']}</string>
</edit>''',
f'''
<edit name="slant" mode="assign_replace">
<const>{fontconfig_style}</const>
</edit>
''' if fontconfig_style else '',
f'''
<edit name="weight" mode="assign_replace">
<int>{fontconfig_weight}</int>
</edit>
''' if fontconfig_weight else '',
f'''
<edit name="width" mode="assign_replace">
<const>{fontconfig_stretch}</const>
</edit>
''' if fontconfig_stretch else '',
f'''
</match>
<match target="font">
<test name="file" compare="eq">
<string>{font_path}</string>
</test>
<edit name="fontfeatures"
mode="assign_replace">{features_string}</edit>
</match>
</fontconfig>'''))
xml_path.write_text(xml)
# TODO: We should mask local fonts with the same name
# too as explained in Behdad's blog entry.
fontconfig.FcConfigParseAndLoad(
config, str(xml_path).encode(FILESYSTEM_ENCODING),
True)
font_added = fontconfig.FcConfigAppFontAddFile(
config, str(font_path).encode(FILESYSTEM_ENCODING))
if font_added:
return pangoft2.pango_fc_font_map_config_changed(
ffi.cast('PangoFcFontMap *', self.font_map))
LOGGER.debug('Failed to load font at %r', url)
LOGGER.warning(
'Font-face %r cannot be loaded', rule_descriptors['font_family'])
def __del__(self):
"""Clean a font configuration for a document."""
rmtree(self._folder, ignore_errors=True)
def font_features(font_kerning='normal', font_variant_ligatures='normal',
font_variant_position='normal', font_variant_caps='normal',
font_variant_numeric='normal',
font_variant_alternates='normal',
font_variant_east_asian='normal',
font_feature_settings='normal'):
"""Get the font features from the different properties in style.
See https://www.w3.org/TR/css-fonts-3/#feature-precedence
"""
features = {}
# Step 1: getting the default, we rely on Pango for this
# Step 2: @font-face font-variant, done in fonts.add_font_face
# Step 3: @font-face font-feature-settings, done in fonts.add_font_face
# Step 4: font-variant and OpenType features
if font_kerning != 'auto':
features['kern'] = int(font_kerning == 'normal')
if font_variant_ligatures == 'none':
for keys in LIGATURE_KEYS.values():
for key in keys:
features[key] = 0
elif font_variant_ligatures != 'normal':
for ligature_type in font_variant_ligatures:
value = 1
if ligature_type.startswith('no-'):
value = 0
ligature_type = ligature_type[3:]
for key in LIGATURE_KEYS[ligature_type]:
features[key] = value
if font_variant_position == 'sub':
# TODO: the specification asks for additional checks
# https://www.w3.org/TR/css-fonts-3/#font-variant-position-prop
features['subs'] = 1
elif font_variant_position == 'super':
features['sups'] = 1
if font_variant_caps != 'normal':
# TODO: the specification asks for additional checks
# https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop
for key in CAPS_KEYS[font_variant_caps]:
features[key] = 1
if font_variant_numeric != 'normal':
for key in font_variant_numeric:
features[NUMERIC_KEYS[key]] = 1
if font_variant_alternates != 'normal':
# TODO: support other values
# See https://www.w3.org/TR/css-fonts-3/#font-variant-caps-prop
if font_variant_alternates == 'historical-forms':
features['hist'] = 1
if font_variant_east_asian != 'normal':
for key in font_variant_east_asian:
features[EAST_ASIAN_KEYS[key]] = 1
# Step 5: incompatible non-OpenType features, already handled by Pango
# Step 6: font-feature-settings
if font_feature_settings != 'normal':
features.update(dict(font_feature_settings))
return features
def get_font_description(style):
"""Get font description string out of given style."""
font_description = ffi.gc(
pango.pango_font_description_new(),
pango.pango_font_description_free)
family_p, family = unicode_to_char_p(','.join(style['font_family']))
pango.pango_font_description_set_family(font_description, family_p)
pango.pango_font_description_set_style(
font_description, PANGO_STYLE[style['font_style']])
pango.pango_font_description_set_stretch(
font_description, PANGO_STRETCH[style['font_stretch']])
pango.pango_font_description_set_weight(
font_description, style['font_weight'])
pango.pango_font_description_set_absolute_size(
font_description, units_from_double(style['font_size']))
if style['font_variation_settings'] != 'normal':
string = ','.join(
f'{key}={value}' for key, value in
style['font_variation_settings']).encode()
pango.pango_font_description_set_variations(
font_description, string)
return font_description
def get_pango_font_hb_face(pango_font):
"""Get Harfbuzz face out of given Pango font."""
fc_font = ffi.cast('PangoFcFont *', pango_font)
fontmap = ffi.cast(
'PangoFcFontMap *', pango.pango_font_get_font_map(pango_font))
return pangoft2.pango_fc_font_map_get_hb_face(fontmap, fc_font)
def get_hb_object_data(hb_object, ot_color=None, glyph=None):
"""Get binary data out of given Harfbuzz font or face.
If ``ot_color`` is 'svg', return the SVG color glyph reference. If its 'png',
return the PNG color glyph reference. Otherwise, return the whole face blob.
"""
if ot_color == 'png':
hb_blob = harfbuzz.hb_ot_color_glyph_reference_png(hb_object, glyph)
elif ot_color == 'svg':
hb_blob = harfbuzz.hb_ot_color_glyph_reference_svg(hb_object, glyph)
else:
hb_blob = harfbuzz.hb_face_reference_blob(hb_object)
with ffi.new('unsigned int *') as length:
hb_data = harfbuzz.hb_blob_get_data(hb_blob, length)
if hb_data == ffi.NULL:
data = None
else:
data = ffi.unpack(hb_data, int(length[0]))
harfbuzz.hb_blob_destroy(hb_blob)
return data
def get_pango_font_key(pango_font):
"""Get key corresponding to given Pango font."""
# TODO: This value is stable for a given Pango font in a given Pango map, but cant
# be cached with just the Pango font as a key because two Pango fonts could point to
# the same address for two different Pango maps. We should cache it in the
# FontConfiguration object. See https://github.com/Kozea/WeasyPrint/issues/2144
description = ffi.gc(
pango.pango_font_describe(pango_font),
pango.pango_font_description_free)
mask = (
pango.PANGO_FONT_MASK_SIZE +
pango.PANGO_FONT_MASK_GRAVITY)
pango.pango_font_description_unset_fields(description, mask)
return pango.pango_font_description_hash(description)

View File

@@ -0,0 +1,545 @@
"""Decide where to break text lines."""
import re
from math import inf
import pyphen
from .constants import LST_TO_ISO, PANGO_WRAP_MODE
from .fonts import font_features, get_font_description
from .ffi import ( # isort:skip
ffi, gobject, pango, pangoft2, unicode_to_char_p, units_from_double,
units_to_double)
def line_size(line, style):
"""Get logical width and height of the given ``line``.
``style`` is used to add letter spacing (if needed).
"""
logical_extents = ffi.new('PangoRectangle *')
pango.pango_layout_line_get_extents(line, ffi.NULL, logical_extents)
width = units_to_double(logical_extents.width)
height = units_to_double(logical_extents.height)
ffi.release(logical_extents)
if style['letter_spacing'] != 'normal':
width += style['letter_spacing']
return width, height
def first_line_metrics(first_line, text, layout, resume_at, space_collapse,
style, hyphenated=False, hyphenation_character=None):
length = first_line.length
if hyphenated:
length -= len(hyphenation_character.encode())
elif resume_at:
# Set an infinite width as we don't want to break lines when drawing,
# the lines have already been split and the size may differ. Rendering
# is also much faster when no width is set.
pango.pango_layout_set_width(layout.layout, -1)
# Create layout with final text
first_line_text = text.encode()[:length].decode()
# Remove trailing spaces if spaces collapse
if space_collapse:
first_line_text = first_line_text.rstrip(' ')
layout.set_text(first_line_text)
first_line, _ = layout.get_first_line()
length = first_line.length if first_line is not None else 0
width, height = line_size(first_line, style)
baseline = units_to_double(pango.pango_layout_get_baseline(layout.layout))
layout.deactivate()
return layout, length, resume_at, width, height, baseline
class Layout:
"""Object holding PangoLayout-related cdata pointers."""
def __init__(self, context, style, justification_spacing=0,
max_width=None):
self.justification_spacing = justification_spacing
self.setup(context, style)
self.max_width = max_width
def setup(self, context, style):
self.context = context
self.style = style
self.first_line_direction = 0
if context is None:
font_map = ffi.gc(
pangoft2.pango_ft2_font_map_new(), gobject.g_object_unref)
else:
font_map = context.font_config.font_map
pango_context = ffi.gc(
pango.pango_font_map_create_context(font_map),
gobject.g_object_unref)
pango.pango_context_set_round_glyph_positions(pango_context, False)
if style['font_language_override'] != 'normal':
lang_p, lang = unicode_to_char_p(LST_TO_ISO.get(
style['font_language_override'].lower(),
style['font_language_override']))
elif style['lang']:
lang_p, lang = unicode_to_char_p(style['lang'])
else:
lang = None
self.language = pango.pango_language_get_default()
if lang:
self.language = pango.pango_language_from_string(lang_p)
pango.pango_context_set_language(pango_context, self.language)
assert not isinstance(style['font_family'], str), (
'font_family should be a list')
font_description = get_font_description(style)
self.layout = ffi.gc(
pango.pango_layout_new(pango_context),
gobject.g_object_unref)
pango.pango_layout_set_font_description(self.layout, font_description)
text_decoration = style['text_decoration_line']
if text_decoration != 'none':
metrics = ffi.gc(
pango.pango_context_get_metrics(
pango_context, font_description, self.language),
pango.pango_font_metrics_unref)
self.ascent = units_to_double(
pango.pango_font_metrics_get_ascent(metrics))
self.underline_position = units_to_double(
pango.pango_font_metrics_get_underline_position(metrics))
self.strikethrough_position = units_to_double(
pango.pango_font_metrics_get_strikethrough_position(metrics))
self.underline_thickness = units_to_double(
pango.pango_font_metrics_get_underline_thickness(metrics))
self.strikethrough_thickness = units_to_double(
pango.pango_font_metrics_get_strikethrough_thickness(metrics))
else:
self.ascent = None
self.underline_position = None
self.strikethrough_position = None
features = font_features(
style['font_kerning'], style['font_variant_ligatures'],
style['font_variant_position'], style['font_variant_caps'],
style['font_variant_numeric'], style['font_variant_alternates'],
style['font_variant_east_asian'], style['font_feature_settings'])
if features and context:
features = ','.join(
f'{key} {value}' for key, value in features.items()).encode()
# In the meantime, keep a cache to avoid leaking too many of them.
attr = context.font_features.setdefault(
features, pango.pango_attr_font_features_new(features))
attr_list = pango.pango_attr_list_new()
pango.pango_attr_list_insert(attr_list, attr)
pango.pango_layout_set_attributes(self.layout, attr_list)
def get_first_line(self):
first_line = pango.pango_layout_get_line_readonly(self.layout, 0)
second_line = pango.pango_layout_get_line_readonly(self.layout, 1)
index = None if second_line == ffi.NULL else second_line.start_index
self.first_line_direction = first_line.resolved_dir
return first_line, index
def set_text(self, text, justify=False):
index = text.find('\n')
if index != -1:
# Keep only the first line plus one character, we don't need more
text = text[:index+2]
self.text = text
text, bytestring = unicode_to_char_p(text)
pango.pango_layout_set_text(self.layout, text, -1)
word_spacing = self.style['word_spacing']
if justify:
# Justification is needed when drawing text but is useless during
# layout, when it can be ignored.
word_spacing += self.justification_spacing
letter_spacing = self.style['letter_spacing']
if letter_spacing == 'normal':
letter_spacing = 0
word_breaking = (
self.style['overflow_wrap'] in ('anywhere', 'break-word'))
if self.text and (word_spacing or letter_spacing or word_breaking):
attr_list = pango.pango_layout_get_attributes(self.layout)
if attr_list == ffi.NULL:
attr_list = ffi.gc(
pango.pango_attr_list_new(),
pango.pango_attr_list_unref)
def add_attr(start, end, spacing):
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index, attr.end_index = start, end
pango.pango_attr_list_change(attr_list, attr)
if letter_spacing:
letter_spacing = units_from_double(letter_spacing)
add_attr(0, len(bytestring), letter_spacing)
if word_spacing:
if bytestring == b' ':
# We need more than one space to set word spacing
self.text = ' \u200b' # Space + zero-width space
text, bytestring = unicode_to_char_p(self.text)
pango.pango_layout_set_text(self.layout, text, -1)
space_spacing = (
units_from_double(word_spacing) + letter_spacing)
position = bytestring.find(b' ')
# Pango gives only half of word-spacing on boundaries
boundary_positions = (0, len(bytestring) - 1)
while position != -1:
factor = 1 + (position in boundary_positions)
add_attr(position, position + 1, factor * space_spacing)
position = bytestring.find(b' ', position + 1)
if word_breaking:
attr = pango.pango_attr_insert_hyphens_new(False)
attr.start_index, attr.end_index = 0, len(bytestring)
pango.pango_attr_list_change(attr_list, attr)
pango.pango_layout_set_attributes(self.layout, attr_list)
# Tabs width
if b'\t' in bytestring:
self.set_tabs()
def set_tabs(self):
if isinstance(self.style['tab_size'], int):
layout = Layout(
self.context, self.style, self.justification_spacing)
layout.set_text(' ' * self.style['tab_size'])
line, _ = layout.get_first_line()
width, _ = line_size(line, self.style)
width = int(round(width))
else:
width = int(self.style['tab_size'].value)
# 0 is not handled correctly by Pango
array = ffi.gc(
pango.pango_tab_array_new_with_positions(
1, True, pango.PANGO_TAB_LEFT, width or 1),
pango.pango_tab_array_free)
pango.pango_layout_set_tabs(self.layout, array)
def deactivate(self):
del self.layout, self.language, self.style
def reactivate(self, style):
self.setup(self.context, style)
self.set_text(self.text, justify=True)
def create_layout(text, style, context, max_width, justification_spacing):
"""Return an opaque Pango layout with default Pango line-breaks."""
layout = Layout(context, style, justification_spacing, max_width)
# Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a
# signed integer. Treat bigger values same as None: unconstrained width.
text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line')
if max_width is not None and text_wrap and max_width < 2 ** 21:
pango.pango_layout_set_width(
layout.layout, units_from_double(max(0, max_width)))
layout.set_text(text)
return layout
def split_first_line(text, style, context, max_width, justification_spacing,
is_line_start=True, minimum=False):
"""Fit as much as possible in the available width for one line of text.
Return ``(layout, length, resume_index, width, height, baseline)``.
``layout``: a pango Layout with the first line
``length``: length in UTF-8 bytes of the first line
``resume_index``: The number of UTF-8 bytes to skip for the next line.
May be ``None`` if the whole text fits in one line.
This may be greater than ``length`` in case of preserved
newline characters.
``width``: width in pixels of the first line
``height``: height in pixels of the first line
``baseline``: baseline in pixels of the first line
"""
# See https://www.w3.org/TR/css-text-3/#white-space-property
text_wrap = style['white_space'] in ('normal', 'pre-wrap', 'pre-line')
space_collapse = style['white_space'] in ('normal', 'nowrap', 'pre-line')
original_max_width = max_width
if not text_wrap:
max_width = None
# Step #1: Get a draft layout with the first line
if max_width is not None and max_width != inf and style['font_size']:
short_text = text
if max_width == 0:
# Trying to find minimum size, let's naively split on spaces and
# keep one word + one letter
space_index = text.find(' ')
if space_index != -1:
short_text = text[:space_index+2] # index + space + one letter
else:
short_text = text[:int(max_width / style['font_size'] * 2.5)]
# Try to use a small amount of text instead of the whole text
layout = create_layout(
short_text, style, context, max_width, justification_spacing)
first_line, resume_index = layout.get_first_line()
if resume_index is None and short_text != text:
# The small amount of text fits in one line, give up and use
# the whole text
layout.set_text(text)
first_line, resume_index = layout.get_first_line()
else:
layout = create_layout(
text, style, context, original_max_width, justification_spacing)
first_line, resume_index = layout.get_first_line()
# Step #2: Don't split lines when it's not needed
if max_width is None:
# The first line can take all the place needed
return first_line_metrics(
first_line, text, layout, resume_index, space_collapse, style)
first_line_width, _ = line_size(first_line, style)
if resume_index is None and first_line_width <= max_width:
# The first line fits in the available width
return first_line_metrics(
first_line, text, layout, resume_index, space_collapse, style)
# Step #3: Try to put the first word of the second line on the first line
# https://mail.gnome.org/archives/gtk-i18n-list/2013-September/msg00006
# is a good thread related to this problem.
first_line_text = text.encode()[:resume_index].decode()
first_line_fits = (
first_line_width <= max_width or
' ' in first_line_text.strip() or
can_break_text(first_line_text.strip(), style['lang']))
if first_line_fits:
# The first line fits but may have been cut too early by Pango
second_line_text = text.encode()[resume_index:].decode()
else:
# The line can't be split earlier, try to hyphenate the first word.
first_line_text = ''
second_line_text = text
next_word = second_line_text.split(' ', 1)[0]
if next_word:
if space_collapse:
# next_word might fit without a space afterwards
# only try when space collapsing is allowed
new_first_line_text = first_line_text + next_word
layout.set_text(new_first_line_text)
first_line, resume_index = layout.get_first_line()
if resume_index is None:
if first_line_text:
# The next word fits in the first line, keep the layout
resume_index = len(new_first_line_text.encode()) + 1
return first_line_metrics(
first_line, text, layout, resume_index, space_collapse,
style)
else:
# Second line is None
resume_index = first_line.length + 1
if resume_index >= len(text.encode()):
resume_index = None
elif first_line_text:
# We found something on the first line but we did not find a word on
# the next line, no need to hyphenate, we can keep the current layout
return first_line_metrics(
first_line, text, layout, resume_index, space_collapse, style)
# Step #4: Try to hyphenate
hyphens = style['hyphens']
lang = style['lang'] and pyphen.language_fallback(style['lang'])
total, left, right = style['hyphenate_limit_chars']
hyphenated = False
soft_hyphen = '\xad'
auto_hyphenation = manual_hyphenation = False
if hyphens != 'none':
manual_hyphenation = soft_hyphen in first_line_text + next_word
if hyphens == 'auto' and lang:
next_word_boundaries = get_next_word_boundaries(second_line_text, lang)
if next_word_boundaries:
# We have a word to hyphenate
start_word, stop_word = next_word_boundaries
next_word = second_line_text[start_word:stop_word]
if stop_word - start_word >= total:
# This word is long enough
first_line_width, _ = line_size(first_line, style)
space = max_width - first_line_width
if style['hyphenate_limit_zone'].unit == '%':
limit_zone = (
max_width * style['hyphenate_limit_zone'].value / 100)
else:
limit_zone = style['hyphenate_limit_zone'].value
if space > limit_zone or space < 0:
# Available space is worth the try, or the line is even too
# long to fit: try to hyphenate
auto_hyphenation = True
# Automatic hyphenation opportunities within a word must be ignored if the
# word contains a conditional hyphen, in favor of the conditional
# hyphen(s).
# See https://drafts.csswg.org/css-text-3/#valdef-hyphens-auto
if manual_hyphenation:
# Manual hyphenation: check that the line ends with a soft
# hyphen and add the missing hyphen
if first_line_text.endswith(soft_hyphen):
# The first line has been split on a soft hyphen
if ' ' in first_line_text:
first_line_text, next_word = first_line_text.rsplit(' ', 1)
next_word = f' {next_word}'
layout.set_text(first_line_text)
first_line, _ = layout.get_first_line()
resume_index = len((f'{first_line_text} ').encode())
else:
first_line_text, next_word = '', first_line_text
soft_hyphen_indexes = [
match.start() for match in re.finditer(soft_hyphen, next_word)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [next_word[:i+1] for i in soft_hyphen_indexes]
start_word = 0
elif auto_hyphenation:
dictionary_key = (lang, left, right, total)
dictionary = context.dictionaries.get(dictionary_key)
if dictionary is None:
dictionary = pyphen.Pyphen(lang=lang, left=left, right=right)
context.dictionaries[dictionary_key] = dictionary
dictionary_iterations = [
start for start, end in dictionary.iterate(next_word)]
else:
dictionary_iterations = []
if dictionary_iterations:
for first_word_part in dictionary_iterations:
new_first_line_text = (
first_line_text +
second_line_text[:start_word] +
first_word_part)
hyphenated_first_line_text = (
new_first_line_text + style['hyphenate_character'])
new_layout = create_layout(
hyphenated_first_line_text, style, context, max_width,
justification_spacing)
new_first_line, index = new_layout.get_first_line()
new_first_line_width, _ = line_size(new_first_line, style)
new_space = max_width - new_first_line_width
hyphenated = index is None and (
new_space >= 0 or first_word_part == dictionary_iterations[-1])
if hyphenated:
layout = new_layout
first_line = new_first_line
resume_index = len(new_first_line_text.encode())
break
if not hyphenated and not first_line_text:
# Recreate the layout with no max_width to be sure that
# we don't break before or inside the hyphenate character
hyphenated = True
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(layout.layout, -1)
first_line, _ = layout.get_first_line()
resume_index = len(new_first_line_text.encode())
if text[len(first_line_text)] == soft_hyphen:
resume_index += len(soft_hyphen.encode())
if not hyphenated and first_line_text.endswith(soft_hyphen):
# Recreate the layout with no max_width to be sure that
# we don't break inside the hyphenate-character string
hyphenated = True
hyphenated_first_line_text = (
first_line_text + style['hyphenate_character'])
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(layout.layout, -1)
first_line, _ = layout.get_first_line()
resume_index = len(first_line_text.encode())
# Step 5: Try to break word if it's too long for the line
overflow_wrap = style['overflow_wrap']
first_line_width, _ = line_size(first_line, style)
space = max_width - first_line_width
# If we can break words and the first line is too long
can_break = (
style['word_break'] == 'break-all' or (
is_line_start and (
overflow_wrap == 'anywhere' or
(overflow_wrap == 'break-word' and not minimum))))
if space < 0 and can_break:
# Is it really OK to remove hyphenation for word-break ?
hyphenated = False
# TODO: Modify code to preserve W3C condition:
# "Shaping characters are still shaped as if the word were not broken"
# The way new lines are processed in this function (one by one with no
# memory of the last) prevents shaping characters (arabic, for
# instance) from keeping their shape when wrapped on the next line with
# pango layout. Maybe insert Unicode shaping characters in text?
layout.set_text(text)
pango.pango_layout_set_width(
layout.layout, units_from_double(max_width))
pango.pango_layout_set_wrap(
layout.layout, PANGO_WRAP_MODE['WRAP_CHAR'])
first_line, index = layout.get_first_line()
resume_index = index or first_line.length
if resume_index >= len(text.encode()):
resume_index = None
return first_line_metrics(
first_line, text, layout, resume_index, space_collapse, style,
hyphenated, style['hyphenate_character'])
def get_log_attrs(text, lang):
if lang:
lang_p, lang = unicode_to_char_p(lang)
else:
lang = None
language = pango.pango_language_get_default()
if lang:
language = pango.pango_language_from_string(lang_p)
# TODO: this should be removed when bidi is supported
for char in ('\u202a', '\u202b', '\u202c', '\u202d', '\u202e'):
text = text.replace(char, '\u200b')
text_p, bytestring = unicode_to_char_p(text)
length = len(text) + 1
log_attrs = ffi.new('PangoLogAttr[]', length)
pango.pango_get_log_attrs(
text_p, len(bytestring), -1, language, log_attrs, length)
return bytestring, log_attrs
def can_break_text(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
length = len(text) + 1
return any(attr.is_line_break for attr in log_attrs[1:length - 1])
def get_next_word_boundaries(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
for i, attr in enumerate(log_attrs):
if attr.is_word_end:
word_end = i
break
if attr.is_word_boundary:
word_start = i
else:
return None
return word_start, word_end
def get_last_word_end(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
for i, attr in enumerate(list(log_attrs)[::-1]):
if i and attr.is_word_end:
return len(text) - i