feat: add comprehensive GitHub workflow and development tools
This commit is contained in:
113
app/.venv/Lib/site-packages/cssselect2/__init__.py
Normal file
113
app/.venv/Lib/site-packages/cssselect2/__init__.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""CSS4 selectors for Python.
|
||||
|
||||
cssselect2 is a straightforward implementation of CSS4 Selectors for markup
|
||||
documents (HTML, XML, etc.) that can be read by ElementTree-like parsers
|
||||
(including cElementTree, lxml, html5lib, etc.)
|
||||
|
||||
"""
|
||||
|
||||
from webencodings import ascii_lower
|
||||
|
||||
# Classes are imported here to expose them at the top level of the module
|
||||
from .compiler import compile_selector_list # noqa
|
||||
from .parser import SelectorError # noqa
|
||||
from .tree import ElementWrapper # noqa
|
||||
|
||||
VERSION = __version__ = '0.8.0'
|
||||
|
||||
|
||||
class Matcher:
|
||||
"""A CSS selectors storage that can match against HTML elements."""
|
||||
def __init__(self):
|
||||
self.id_selectors = {}
|
||||
self.class_selectors = {}
|
||||
self.lower_local_name_selectors = {}
|
||||
self.namespace_selectors = {}
|
||||
self.lang_attr_selectors = []
|
||||
self.other_selectors = []
|
||||
self.order = 0
|
||||
|
||||
def add_selector(self, selector, payload):
|
||||
"""Add a selector and its payload to the matcher.
|
||||
|
||||
:param selector:
|
||||
A :class:`compiler.CompiledSelector` object.
|
||||
:param payload:
|
||||
Some data associated to the selector,
|
||||
such as :class:`declarations <tinycss2.ast.Declaration>`
|
||||
parsed from the :attr:`tinycss2.ast.QualifiedRule.content`
|
||||
of a style rule.
|
||||
It can be any Python object,
|
||||
and will be returned as-is by :meth:`match`.
|
||||
|
||||
"""
|
||||
self.order += 1
|
||||
|
||||
if selector.never_matches:
|
||||
return
|
||||
|
||||
entry = (
|
||||
selector.test, selector.specificity, self.order, selector.pseudo_element,
|
||||
payload)
|
||||
if selector.id is not None:
|
||||
self.id_selectors.setdefault(selector.id, []).append(entry)
|
||||
elif selector.class_name is not None:
|
||||
self.class_selectors.setdefault(selector.class_name, []).append(entry)
|
||||
elif selector.local_name is not None:
|
||||
self.lower_local_name_selectors.setdefault(
|
||||
selector.lower_local_name, []).append(entry)
|
||||
elif selector.namespace is not None:
|
||||
self.namespace_selectors.setdefault(selector.namespace, []).append(entry)
|
||||
elif selector.requires_lang_attr:
|
||||
self.lang_attr_selectors.append(entry)
|
||||
else:
|
||||
self.other_selectors.append(entry)
|
||||
|
||||
def match(self, element):
|
||||
"""Match selectors against the given element.
|
||||
|
||||
:param element:
|
||||
An :class:`ElementWrapper`.
|
||||
:returns:
|
||||
A list of the payload objects associated to selectors that match
|
||||
element, in order of lowest to highest
|
||||
:attr:`compiler.CompiledSelector` specificity and in order of
|
||||
addition with :meth:`add_selector` among selectors of equal
|
||||
specificity.
|
||||
|
||||
"""
|
||||
relevant_selectors = []
|
||||
|
||||
if element.id is not None and element.id in self.id_selectors:
|
||||
self.add_relevant_selectors(
|
||||
element, self.id_selectors[element.id], relevant_selectors)
|
||||
|
||||
for class_name in element.classes:
|
||||
if class_name in self.class_selectors:
|
||||
self.add_relevant_selectors(
|
||||
element, self.class_selectors[class_name], relevant_selectors)
|
||||
|
||||
lower_name = ascii_lower(element.local_name)
|
||||
if lower_name in self.lower_local_name_selectors:
|
||||
self.add_relevant_selectors(
|
||||
element, self.lower_local_name_selectors[lower_name],
|
||||
relevant_selectors)
|
||||
if element.namespace_url in self.namespace_selectors:
|
||||
self.add_relevant_selectors(
|
||||
element, self.namespace_selectors[element.namespace_url],
|
||||
relevant_selectors)
|
||||
|
||||
if 'lang' in element.etree_element.attrib:
|
||||
self.add_relevant_selectors(
|
||||
element, self.lang_attr_selectors, relevant_selectors)
|
||||
|
||||
self.add_relevant_selectors(element, self.other_selectors, relevant_selectors)
|
||||
|
||||
relevant_selectors.sort()
|
||||
return relevant_selectors
|
||||
|
||||
@staticmethod
|
||||
def add_relevant_selectors(element, selectors, relevant_selectors):
|
||||
for test, specificity, order, pseudo, payload in selectors:
|
||||
if test(element):
|
||||
relevant_selectors.append((specificity, order, pseudo, payload))
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
426
app/.venv/Lib/site-packages/cssselect2/compiler.py
Normal file
426
app/.venv/Lib/site-packages/cssselect2/compiler.py
Normal file
@@ -0,0 +1,426 @@
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from tinycss2.nth import parse_nth
|
||||
from webencodings import ascii_lower
|
||||
|
||||
from . import parser
|
||||
from .parser import SelectorError
|
||||
|
||||
# http://dev.w3.org/csswg/selectors/#whitespace
|
||||
split_whitespace = re.compile('[^ \t\r\n\f]+').findall
|
||||
|
||||
|
||||
def compile_selector_list(input, namespaces=None):
|
||||
"""Compile a (comma-separated) list of selectors.
|
||||
|
||||
:param input:
|
||||
A string, or an iterable of tinycss2 component values such as
|
||||
the :attr:`tinycss2.ast.QualifiedRule.prelude` of a style rule.
|
||||
:param namespaces:
|
||||
A optional dictionary of all `namespace prefix declarations
|
||||
<http://www.w3.org/TR/selectors/#nsdecl>`_ in scope for this selector.
|
||||
Keys are namespace prefixes as strings, or ``None`` for the default
|
||||
namespace.
|
||||
Values are namespace URLs as strings.
|
||||
If omitted, assume that no prefix is declared.
|
||||
:returns:
|
||||
A list of opaque :class:`compiler.CompiledSelector` objects.
|
||||
|
||||
"""
|
||||
return [CompiledSelector(selector) for selector in parser.parse(input, namespaces)]
|
||||
|
||||
|
||||
class CompiledSelector:
|
||||
"""Abstract representation of a selector."""
|
||||
def __init__(self, parsed_selector):
|
||||
source = _compile_node(parsed_selector.parsed_tree)
|
||||
self.never_matches = source == '0'
|
||||
eval_globals = {
|
||||
'split_whitespace': split_whitespace,
|
||||
'ascii_lower': ascii_lower,
|
||||
'urlparse': urlparse,
|
||||
}
|
||||
self.test = eval('lambda el: ' + source, eval_globals, {})
|
||||
self.specificity = parsed_selector.specificity
|
||||
self.pseudo_element = parsed_selector.pseudo_element
|
||||
self.id = None
|
||||
self.class_name = None
|
||||
self.local_name = None
|
||||
self.lower_local_name = None
|
||||
self.namespace = None
|
||||
self.requires_lang_attr = False
|
||||
|
||||
node = parsed_selector.parsed_tree
|
||||
if isinstance(node, parser.CombinedSelector):
|
||||
node = node.right
|
||||
for simple_selector in node.simple_selectors:
|
||||
if isinstance(simple_selector, parser.IDSelector):
|
||||
self.id = simple_selector.ident
|
||||
elif isinstance(simple_selector, parser.ClassSelector):
|
||||
self.class_name = simple_selector.class_name
|
||||
elif isinstance(simple_selector, parser.LocalNameSelector):
|
||||
self.local_name = simple_selector.local_name
|
||||
self.lower_local_name = simple_selector.lower_local_name
|
||||
elif isinstance(simple_selector, parser.NamespaceSelector):
|
||||
self.namespace = simple_selector.namespace
|
||||
elif isinstance(simple_selector, parser.AttributeSelector):
|
||||
if simple_selector.name == 'lang':
|
||||
self.requires_lang_attr = True
|
||||
|
||||
|
||||
def _compile_node(selector):
|
||||
"""Return a boolean expression, as a Python source string.
|
||||
|
||||
When evaluated in a context where the `el` variable is an
|
||||
:class:`cssselect2.tree.Element` object, tells whether the element is a
|
||||
subject of `selector`.
|
||||
|
||||
"""
|
||||
# To avoid precedence-related bugs, any sub-expression that is passed
|
||||
# around must be "atomic": add parentheses when the top-level would be
|
||||
# an operator. Bare literals and function calls are fine.
|
||||
|
||||
# 1 and 0 are used for True and False to avoid global lookups.
|
||||
|
||||
if isinstance(selector, parser.CombinedSelector):
|
||||
left_inside = _compile_node(selector.left)
|
||||
if left_inside == '0':
|
||||
return '0' # 0 and x == 0
|
||||
elif left_inside == '1':
|
||||
# 1 and x == x, but the element matching 1 still needs to exist.
|
||||
if selector.combinator in (' ', '>'):
|
||||
left = 'el.parent is not None'
|
||||
elif selector.combinator in ('~', '+'):
|
||||
left = 'el.previous is not None'
|
||||
else:
|
||||
raise SelectorError('Unknown combinator', selector.combinator)
|
||||
# Rebind the `el` name inside a generator-expressions (in a new scope)
|
||||
# so that 'left_inside' applies to different elements.
|
||||
elif selector.combinator == ' ':
|
||||
left = f'any(({left_inside}) for el in el.ancestors)'
|
||||
elif selector.combinator == '>':
|
||||
left = (
|
||||
f'next(el is not None and ({left_inside}) '
|
||||
'for el in [el.parent])')
|
||||
elif selector.combinator == '+':
|
||||
left = (
|
||||
f'next(el is not None and ({left_inside}) '
|
||||
'for el in [el.previous])')
|
||||
elif selector.combinator == '~':
|
||||
left = f'any(({left_inside}) for el in el.previous_siblings)'
|
||||
else:
|
||||
raise SelectorError('Unknown combinator', selector.combinator)
|
||||
|
||||
right = _compile_node(selector.right)
|
||||
if right == '0':
|
||||
return '0' # 0 and x == 0
|
||||
elif right == '1':
|
||||
return left # 1 and x == x
|
||||
else:
|
||||
# Evaluate combinators right to left
|
||||
return f'({right}) and ({left})'
|
||||
|
||||
elif isinstance(selector, parser.CompoundSelector):
|
||||
sub_expressions = [
|
||||
expr for expr in [
|
||||
_compile_node(selector)
|
||||
for selector in selector.simple_selectors]
|
||||
if expr != '1']
|
||||
if len(sub_expressions) == 1:
|
||||
return sub_expressions[0]
|
||||
elif '0' in sub_expressions:
|
||||
return '0'
|
||||
elif sub_expressions:
|
||||
return ' and '.join(f'({el})' for el in sub_expressions)
|
||||
else:
|
||||
return '1' # all([]) == True
|
||||
|
||||
elif isinstance(selector, parser.NegationSelector):
|
||||
sub_expressions = [
|
||||
expr for expr in [
|
||||
_compile_node(selector.parsed_tree)
|
||||
for selector in selector.selector_list]
|
||||
if expr != '1']
|
||||
if not sub_expressions:
|
||||
return '0'
|
||||
return f'not ({" or ".join(f"({expr})" for expr in sub_expressions)})'
|
||||
|
||||
elif isinstance(selector, parser.RelationalSelector):
|
||||
sub_expressions = []
|
||||
for relative_selector in selector.selector_list:
|
||||
expression = _compile_node(relative_selector.selector.parsed_tree)
|
||||
if expression == '0':
|
||||
continue
|
||||
if relative_selector.combinator == ' ':
|
||||
elements = 'list(el.iter_subtree())[1:]'
|
||||
elif relative_selector.combinator == '>':
|
||||
elements = 'el.iter_children()'
|
||||
elif relative_selector.combinator == '+':
|
||||
elements = 'list(el.iter_next_siblings())[:1]'
|
||||
elif relative_selector.combinator == '~':
|
||||
elements = 'el.iter_next_siblings()'
|
||||
sub_expressions.append(f'(any({expression} for el in {elements}))')
|
||||
return ' or '.join(sub_expressions)
|
||||
|
||||
elif isinstance(selector, (
|
||||
parser.MatchesAnySelector, parser.SpecificityAdjustmentSelector)):
|
||||
sub_expressions = [
|
||||
expr for expr in [
|
||||
_compile_node(selector.parsed_tree)
|
||||
for selector in selector.selector_list]
|
||||
if expr != '0']
|
||||
if not sub_expressions:
|
||||
return '0'
|
||||
return ' or '.join(f'({expr})' for expr in sub_expressions)
|
||||
|
||||
elif isinstance(selector, parser.LocalNameSelector):
|
||||
if selector.lower_local_name == selector.local_name:
|
||||
return f'el.local_name == {selector.local_name!r}'
|
||||
else:
|
||||
return (
|
||||
f'el.local_name == ({selector.lower_local_name!r} '
|
||||
f'if el.in_html_document else {selector.local_name!r})')
|
||||
|
||||
elif isinstance(selector, parser.NamespaceSelector):
|
||||
return f'el.namespace_url == {selector.namespace!r}'
|
||||
|
||||
elif isinstance(selector, parser.ClassSelector):
|
||||
return f'{selector.class_name!r} in el.classes'
|
||||
|
||||
elif isinstance(selector, parser.IDSelector):
|
||||
return f'el.id == {selector.ident!r}'
|
||||
|
||||
elif isinstance(selector, parser.AttributeSelector):
|
||||
if selector.namespace is not None:
|
||||
if selector.namespace:
|
||||
if selector.name == selector.lower_name:
|
||||
key = repr(f'{{{selector.namespace}}}{selector.name}')
|
||||
else:
|
||||
lower = f'{{{selector.namespace}}}{selector.lower_name}'
|
||||
name = f'{{{selector.namespace}}}{selector.name}'
|
||||
key = f'({lower!r} if el.in_html_document else {name!r})'
|
||||
else:
|
||||
if selector.name == selector.lower_name:
|
||||
key = repr(selector.name)
|
||||
else:
|
||||
lower, name = selector.lower_name, selector.name
|
||||
key = f'({lower!r} if el.in_html_document else {name!r})'
|
||||
value = selector.value
|
||||
attribute_value = f'el.etree_element.get({key}, "")'
|
||||
if selector.case_sensitive is False:
|
||||
value = value.lower()
|
||||
attribute_value += '.lower()'
|
||||
if selector.operator is None:
|
||||
return f'{key} in el.etree_element.attrib'
|
||||
elif selector.operator == '=':
|
||||
return (
|
||||
f'{key} in el.etree_element.attrib and '
|
||||
f'{attribute_value} == {value!r}')
|
||||
elif selector.operator == '~=':
|
||||
return (
|
||||
'0' if len(value.split()) != 1 or value.strip() != value
|
||||
else f'{value!r} in split_whitespace({attribute_value})')
|
||||
elif selector.operator == '|=':
|
||||
return (
|
||||
f'{key} in el.etree_element.attrib and '
|
||||
f'{attribute_value} == {value!r} or '
|
||||
f'{attribute_value}.startswith({(value + "-")!r})')
|
||||
elif selector.operator == '^=':
|
||||
if value:
|
||||
return f'{attribute_value}.startswith({value!r})'
|
||||
else:
|
||||
return '0'
|
||||
elif selector.operator == '$=':
|
||||
return (
|
||||
f'{attribute_value}.endswith({value!r})' if value else '0')
|
||||
elif selector.operator == '*=':
|
||||
return f'{value!r} in {attribute_value}' if value else '0'
|
||||
else:
|
||||
raise SelectorError('Unknown attribute operator', selector.operator)
|
||||
else: # In any namespace
|
||||
raise NotImplementedError # TODO
|
||||
|
||||
elif isinstance(selector, parser.PseudoClassSelector):
|
||||
if selector.name in ('link', 'any-link', 'local-link'):
|
||||
test = html_tag_eq('a', 'area', 'link')
|
||||
test += ' and el.etree_element.get("href") is not None '
|
||||
if selector.name == 'local-link':
|
||||
test += 'and not urlparse(el.etree_element.get("href")).scheme'
|
||||
return test
|
||||
elif selector.name == 'enabled':
|
||||
input = html_tag_eq(
|
||||
'button', 'input', 'select', 'textarea', 'option')
|
||||
group = html_tag_eq('optgroup', 'menuitem', 'fieldset')
|
||||
a = html_tag_eq('a', 'area', 'link')
|
||||
return (
|
||||
f'({input} and el.etree_element.get("disabled") is None'
|
||||
' and not el.in_disabled_fieldset) or'
|
||||
f'({group} and el.etree_element.get("disabled") is None) or '
|
||||
f'({a} and el.etree_element.get("href") is not None)')
|
||||
elif selector.name == 'disabled':
|
||||
input = html_tag_eq(
|
||||
'button', 'input', 'select', 'textarea', 'option')
|
||||
group = html_tag_eq('optgroup', 'menuitem', 'fieldset')
|
||||
return (
|
||||
f'({input} and (el.etree_element.get("disabled") is not None'
|
||||
' or el.in_disabled_fieldset)) or'
|
||||
f'({group} and el.etree_element.get("disabled") is not None)')
|
||||
elif selector.name == 'checked':
|
||||
input = html_tag_eq('input', 'menuitem')
|
||||
option = html_tag_eq('option')
|
||||
return (
|
||||
f'({input} and el.etree_element.get("checked") is not None and'
|
||||
' ascii_lower(el.etree_element.get("type", "")) '
|
||||
' in ("checkbox", "radio")) or ('
|
||||
f'{option} and el.etree_element.get("selected") is not None)')
|
||||
elif selector.name in (
|
||||
'visited', 'hover', 'active', 'focus', 'focus-within',
|
||||
'focus-visible', 'target', 'target-within', 'current', 'past',
|
||||
'future', 'playing', 'paused', 'seeking', 'buffering',
|
||||
'stalled', 'muted', 'volume-locked', 'user-valid',
|
||||
'user-invalid'):
|
||||
# Not applicable in a static context: never match.
|
||||
return '0'
|
||||
elif selector.name in ('root', 'scope'):
|
||||
return 'el.parent is None'
|
||||
elif selector.name == 'first-child':
|
||||
return 'el.index == 0'
|
||||
elif selector.name == 'last-child':
|
||||
return 'el.index + 1 == len(el.etree_siblings)'
|
||||
elif selector.name == 'first-of-type':
|
||||
return (
|
||||
'all(s.tag != el.etree_element.tag'
|
||||
' for s in el.etree_siblings[:el.index])')
|
||||
elif selector.name == 'last-of-type':
|
||||
return (
|
||||
'all(s.tag != el.etree_element.tag'
|
||||
' for s in el.etree_siblings[el.index + 1:])')
|
||||
elif selector.name == 'only-child':
|
||||
return 'len(el.etree_siblings) == 1'
|
||||
elif selector.name == 'only-of-type':
|
||||
return (
|
||||
'all(s.tag != el.etree_element.tag or i == el.index'
|
||||
' for i, s in enumerate(el.etree_siblings))')
|
||||
elif selector.name == 'empty':
|
||||
return 'not (el.etree_children or el.etree_element.text)'
|
||||
else:
|
||||
raise SelectorError('Unknown pseudo-class', selector.name)
|
||||
|
||||
elif isinstance(selector, parser.FunctionalPseudoClassSelector):
|
||||
if selector.name == 'lang':
|
||||
langs = []
|
||||
tokens = [
|
||||
token for token in selector.arguments
|
||||
if token.type not in ('whitespace', 'comment')]
|
||||
while tokens:
|
||||
token = tokens.pop(0)
|
||||
if token.type == 'ident':
|
||||
langs.append(token.lower_value)
|
||||
elif token.type == 'string':
|
||||
langs.append(ascii_lower(token.value))
|
||||
else:
|
||||
raise SelectorError('Invalid arguments for :lang()')
|
||||
if tokens:
|
||||
token = tokens.pop(0)
|
||||
if token.type != 'ident' and token.value != ',':
|
||||
raise SelectorError('Invalid arguments for :lang()')
|
||||
return ' or '.join(
|
||||
f'el.lang == {lang!r} or el.lang.startswith({(lang + "-")!r})'
|
||||
for lang in langs)
|
||||
else:
|
||||
nth = []
|
||||
selector_list = []
|
||||
current_list = nth
|
||||
for argument in selector.arguments:
|
||||
if argument.type == 'ident' and argument.value == 'of':
|
||||
if current_list is nth:
|
||||
current_list = selector_list
|
||||
continue
|
||||
current_list.append(argument)
|
||||
|
||||
if selector_list:
|
||||
test = ' and '.join(
|
||||
_compile_node(selector.parsed_tree)
|
||||
for selector in parser.parse(selector_list))
|
||||
if selector.name == 'nth-child':
|
||||
count = (
|
||||
f'sum(1 for el in el.previous_siblings if ({test}))')
|
||||
elif selector.name == 'nth-last-child':
|
||||
count = (
|
||||
'sum(1 for el in'
|
||||
' tuple(el.iter_siblings())[el.index + 1:]'
|
||||
f' if ({test}))')
|
||||
elif selector.name == 'nth-of-type':
|
||||
count = (
|
||||
'sum(1 for s in ('
|
||||
' el for el in el.previous_siblings'
|
||||
f' if ({test}))'
|
||||
' if s.etree_element.tag == el.etree_element.tag)')
|
||||
elif selector.name == 'nth-last-of-type':
|
||||
count = (
|
||||
'sum(1 for s in ('
|
||||
' el for el in'
|
||||
' tuple(el.iter_siblings())[el.index + 1:]'
|
||||
f' if ({test}))'
|
||||
' if s.etree_element.tag == el.etree_element.tag)')
|
||||
else:
|
||||
raise SelectorError('Unknown pseudo-class', selector.name)
|
||||
count += f'if ({test}) else float("nan")'
|
||||
else:
|
||||
if current_list is selector_list:
|
||||
raise SelectorError(
|
||||
f'Invalid arguments for :{selector.name}()')
|
||||
if selector.name == 'nth-child':
|
||||
count = 'el.index'
|
||||
elif selector.name == 'nth-last-child':
|
||||
count = 'len(el.etree_siblings) - el.index - 1'
|
||||
elif selector.name == 'nth-of-type':
|
||||
count = (
|
||||
'sum(1 for s in el.etree_siblings[:el.index]'
|
||||
' if s.tag == el.etree_element.tag)')
|
||||
elif selector.name == 'nth-last-of-type':
|
||||
count = (
|
||||
'sum(1 for s in el.etree_siblings[el.index + 1:]'
|
||||
' if s.tag == el.etree_element.tag)')
|
||||
else:
|
||||
raise SelectorError('Unknown pseudo-class', selector.name)
|
||||
|
||||
result = parse_nth(nth)
|
||||
if result is None:
|
||||
raise SelectorError(
|
||||
f'Invalid arguments for :{selector.name}()')
|
||||
a, b = result
|
||||
# x is the number of siblings before/after the element
|
||||
# Matches if a positive or zero integer n exists so that:
|
||||
# x = a*n + b-1
|
||||
# x = a*n + B
|
||||
B = b - 1 # noqa: N806
|
||||
if a == 0:
|
||||
# x = B
|
||||
return f'({count}) == {B}'
|
||||
else:
|
||||
# n = (x - B) / a
|
||||
return (
|
||||
'next(r == 0 and n >= 0'
|
||||
f' for n, r in [divmod(({count}) - {B}, {a})])')
|
||||
|
||||
else:
|
||||
raise TypeError(type(selector), selector)
|
||||
|
||||
|
||||
def html_tag_eq(*local_names):
|
||||
"""Generate expression testing equality with HTML local names."""
|
||||
if len(local_names) == 1:
|
||||
tag = f'{{http://www.w3.org/1999/xhtml}}{local_names[0]}'
|
||||
return (
|
||||
f'((el.local_name == {local_names[0]!r}) if el.in_html_document '
|
||||
f'else (el.etree_element.tag == {tag!r}))')
|
||||
else:
|
||||
names = ', '.join(repr(n) for n in local_names)
|
||||
tags = ', '.join(
|
||||
repr(f'{{http://www.w3.org/1999/xhtml}}{name}')
|
||||
for name in local_names)
|
||||
return (
|
||||
f'((el.local_name in ({names})) if el.in_html_document '
|
||||
f'else (el.etree_element.tag in ({tags})))')
|
||||
522
app/.venv/Lib/site-packages/cssselect2/parser.py
Normal file
522
app/.venv/Lib/site-packages/cssselect2/parser.py
Normal file
@@ -0,0 +1,522 @@
|
||||
from tinycss2 import parse_component_value_list
|
||||
|
||||
__all__ = ['parse']
|
||||
|
||||
SUPPORTED_PSEUDO_ELEMENTS = {
|
||||
# As per CSS Pseudo-Elements Module Level 4
|
||||
'first-line', 'first-letter', 'prefix', 'postfix', 'selection',
|
||||
'target-text', 'spelling-error', 'grammar-error', 'before', 'after',
|
||||
'marker', 'placeholder', 'file-selector-button',
|
||||
# As per CSS Generated Content for Paged Media Module
|
||||
'footnote-call', 'footnote-marker',
|
||||
# As per CSS Scoping Module Level 1
|
||||
'content', 'shadow',
|
||||
}
|
||||
|
||||
|
||||
def parse(input, namespaces=None, forgiving=False, relative=False):
|
||||
"""Yield tinycss2 selectors found in given ``input``.
|
||||
|
||||
:param input:
|
||||
A string, or an iterable of tinycss2 component values.
|
||||
|
||||
"""
|
||||
if isinstance(input, str):
|
||||
input = parse_component_value_list(input)
|
||||
tokens = TokenStream(input)
|
||||
namespaces = namespaces or {}
|
||||
try:
|
||||
yield parse_selector(tokens, namespaces, relative)
|
||||
except SelectorError as exception:
|
||||
if forgiving:
|
||||
return
|
||||
raise exception
|
||||
while 1:
|
||||
next = tokens.next()
|
||||
if next is None:
|
||||
return
|
||||
elif next == ',':
|
||||
try:
|
||||
yield parse_selector(tokens, namespaces, relative)
|
||||
except SelectorError as exception:
|
||||
if not forgiving:
|
||||
raise exception
|
||||
else:
|
||||
if not forgiving:
|
||||
raise SelectorError(next, f'unexpected {next.type} token.')
|
||||
|
||||
|
||||
def parse_selector(tokens, namespaces, relative=False):
|
||||
tokens.skip_whitespace_and_comment()
|
||||
if relative:
|
||||
peek = tokens.peek()
|
||||
if peek in ('>', '+', '~'):
|
||||
initial_combinator = peek.value
|
||||
tokens.next()
|
||||
else:
|
||||
initial_combinator = ' '
|
||||
tokens.skip_whitespace_and_comment()
|
||||
result, pseudo_element = parse_compound_selector(tokens, namespaces)
|
||||
while 1:
|
||||
has_whitespace = tokens.skip_whitespace()
|
||||
while tokens.skip_comment():
|
||||
has_whitespace = tokens.skip_whitespace() or has_whitespace
|
||||
selector = Selector(result, pseudo_element)
|
||||
if relative:
|
||||
selector = RelativeSelector(initial_combinator, selector)
|
||||
if pseudo_element is not None:
|
||||
return selector
|
||||
peek = tokens.peek()
|
||||
if peek is None or peek == ',':
|
||||
return selector
|
||||
elif peek in ('>', '+', '~'):
|
||||
combinator = peek.value
|
||||
tokens.next()
|
||||
elif has_whitespace:
|
||||
combinator = ' '
|
||||
else:
|
||||
return selector
|
||||
compound, pseudo_element = parse_compound_selector(tokens, namespaces)
|
||||
result = CombinedSelector(result, combinator, compound)
|
||||
|
||||
|
||||
def parse_compound_selector(tokens, namespaces):
|
||||
type_selectors = parse_type_selector(tokens, namespaces)
|
||||
simple_selectors = type_selectors if type_selectors is not None else []
|
||||
while 1:
|
||||
simple_selector, pseudo_element = parse_simple_selector(
|
||||
tokens, namespaces)
|
||||
if pseudo_element is not None or simple_selector is None:
|
||||
break
|
||||
simple_selectors.append(simple_selector)
|
||||
|
||||
if simple_selectors or (type_selectors, pseudo_element) != (None, None):
|
||||
return CompoundSelector(simple_selectors), pseudo_element
|
||||
|
||||
peek = tokens.peek()
|
||||
peek_type = peek.type if peek else 'EOF'
|
||||
raise SelectorError(peek, f'expected a compound selector, got {peek_type}')
|
||||
|
||||
|
||||
def parse_type_selector(tokens, namespaces):
|
||||
tokens.skip_whitespace()
|
||||
qualified_name = parse_qualified_name(tokens, namespaces)
|
||||
if qualified_name is None:
|
||||
return None
|
||||
|
||||
simple_selectors = []
|
||||
namespace, local_name = qualified_name
|
||||
if local_name is not None:
|
||||
simple_selectors.append(LocalNameSelector(local_name))
|
||||
if namespace is not None:
|
||||
simple_selectors.append(NamespaceSelector(namespace))
|
||||
return simple_selectors
|
||||
|
||||
|
||||
def parse_simple_selector(tokens, namespaces):
|
||||
peek = tokens.peek()
|
||||
if peek is None:
|
||||
return None, None
|
||||
if peek.type == 'hash' and peek.is_identifier:
|
||||
tokens.next()
|
||||
return IDSelector(peek.value), None
|
||||
elif peek == '.':
|
||||
tokens.next()
|
||||
next = tokens.next()
|
||||
if next is None or next.type != 'ident':
|
||||
raise SelectorError(next, f'Expected a class name, got {next}')
|
||||
return ClassSelector(next.value), None
|
||||
elif peek.type == '[] block':
|
||||
tokens.next()
|
||||
attr = parse_attribute_selector(TokenStream(peek.content), namespaces)
|
||||
return attr, None
|
||||
elif peek == ':':
|
||||
tokens.next()
|
||||
next = tokens.next()
|
||||
if next == ':':
|
||||
next = tokens.next()
|
||||
if next is None or next.type != 'ident':
|
||||
raise SelectorError(next, f'Expected a pseudo-element name, got {next}')
|
||||
value = next.lower_value
|
||||
if value not in SUPPORTED_PSEUDO_ELEMENTS:
|
||||
raise SelectorError(
|
||||
next, f'Expected a supported pseudo-element, got {value}')
|
||||
return None, value
|
||||
elif next is not None and next.type == 'ident':
|
||||
name = next.lower_value
|
||||
if name in ('before', 'after', 'first-line', 'first-letter'):
|
||||
return None, name
|
||||
else:
|
||||
return PseudoClassSelector(name), None
|
||||
elif next is not None and next.type == 'function':
|
||||
name = next.lower_name
|
||||
if name in ('is', 'where', 'not', 'has'):
|
||||
return parse_logical_combination(next, namespaces, name), None
|
||||
else:
|
||||
return (FunctionalPseudoClassSelector(name, next.arguments), None)
|
||||
else:
|
||||
raise SelectorError(next, f'unexpected {next} token.')
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def parse_logical_combination(matches_any_token, namespaces, name):
|
||||
forgiving = True
|
||||
relative = False
|
||||
if name == 'is':
|
||||
selector_class = MatchesAnySelector
|
||||
elif name == 'where':
|
||||
selector_class = SpecificityAdjustmentSelector
|
||||
elif name == 'not':
|
||||
forgiving = False
|
||||
selector_class = NegationSelector
|
||||
elif name == 'has':
|
||||
relative = True
|
||||
selector_class = RelationalSelector
|
||||
|
||||
selectors = [
|
||||
selector for selector in
|
||||
parse(matches_any_token.arguments, namespaces, forgiving, relative)
|
||||
if selector.pseudo_element is None]
|
||||
return selector_class(selectors)
|
||||
|
||||
|
||||
def parse_attribute_selector(tokens, namespaces):
|
||||
tokens.skip_whitespace()
|
||||
qualified_name = parse_qualified_name(tokens, namespaces, is_attribute=True)
|
||||
if qualified_name is None:
|
||||
next = tokens.next()
|
||||
raise SelectorError(next, f'expected attribute name, got {next}')
|
||||
namespace, local_name = qualified_name
|
||||
|
||||
tokens.skip_whitespace()
|
||||
peek = tokens.peek()
|
||||
if peek is None:
|
||||
operator = None
|
||||
value = None
|
||||
elif peek in ('=', '~=', '|=', '^=', '$=', '*='):
|
||||
operator = peek.value
|
||||
tokens.next()
|
||||
tokens.skip_whitespace()
|
||||
next = tokens.next()
|
||||
if next is None or next.type not in ('ident', 'string'):
|
||||
next_type = 'None' if next is None else next.type
|
||||
raise SelectorError(next, f'expected attribute value, got {next_type}')
|
||||
value = next.value
|
||||
else:
|
||||
raise SelectorError(peek, f'expected attribute selector operator, got {peek}')
|
||||
|
||||
tokens.skip_whitespace()
|
||||
next = tokens.next()
|
||||
case_sensitive = None
|
||||
if next is not None:
|
||||
if next.type == 'ident' and next.value.lower() == 'i':
|
||||
case_sensitive = False
|
||||
elif next.type == 'ident' and next.value.lower() == 's':
|
||||
case_sensitive = True
|
||||
else:
|
||||
raise SelectorError(next, f'expected ], got {next.type}')
|
||||
return AttributeSelector(namespace, local_name, operator, value, case_sensitive)
|
||||
|
||||
|
||||
def parse_qualified_name(tokens, namespaces, is_attribute=False):
|
||||
"""Return ``(namespace, local)`` for given tokens.
|
||||
|
||||
Can also return ``None`` for a wildcard.
|
||||
|
||||
The empty string for ``namespace`` means "no namespace".
|
||||
|
||||
"""
|
||||
peek = tokens.peek()
|
||||
if peek is None:
|
||||
return None
|
||||
if peek.type == 'ident':
|
||||
first_ident = tokens.next()
|
||||
peek = tokens.peek()
|
||||
if peek != '|':
|
||||
namespace = '' if is_attribute else namespaces.get(None, None)
|
||||
return namespace, (first_ident.value, first_ident.lower_value)
|
||||
tokens.next()
|
||||
namespace = namespaces.get(first_ident.value)
|
||||
if namespace is None:
|
||||
raise SelectorError(
|
||||
first_ident, f'undefined namespace prefix: {first_ident.value}')
|
||||
elif peek == '*':
|
||||
next = tokens.next()
|
||||
peek = tokens.peek()
|
||||
if peek != '|':
|
||||
if is_attribute:
|
||||
raise SelectorError(next, f'expected local name, got {next.type}')
|
||||
return namespaces.get(None, None), None
|
||||
tokens.next()
|
||||
namespace = None
|
||||
elif peek == '|':
|
||||
tokens.next()
|
||||
namespace = ''
|
||||
else:
|
||||
return None
|
||||
|
||||
# If we get here, we just consumed '|' and set ``namespace``
|
||||
next = tokens.next()
|
||||
if next.type == 'ident':
|
||||
return namespace, (next.value, next.lower_value)
|
||||
elif next == '*' and not is_attribute:
|
||||
return namespace, None
|
||||
else:
|
||||
raise SelectorError(next, f'expected local name, got {next.type}')
|
||||
|
||||
|
||||
class SelectorError(ValueError):
|
||||
"""A specialized ``ValueError`` for invalid selectors."""
|
||||
|
||||
|
||||
class TokenStream:
|
||||
def __init__(self, tokens):
|
||||
self.tokens = iter(tokens)
|
||||
self.peeked = [] # In reversed order
|
||||
|
||||
def next(self):
|
||||
if self.peeked:
|
||||
return self.peeked.pop()
|
||||
else:
|
||||
return next(self.tokens, None)
|
||||
|
||||
def peek(self):
|
||||
if not self.peeked:
|
||||
self.peeked.append(next(self.tokens, None))
|
||||
return self.peeked[-1]
|
||||
|
||||
def skip(self, skip_types):
|
||||
found = False
|
||||
while 1:
|
||||
peek = self.peek()
|
||||
if peek is None or peek.type not in skip_types:
|
||||
break
|
||||
self.next()
|
||||
found = True
|
||||
return found
|
||||
|
||||
def skip_whitespace(self):
|
||||
return self.skip(['whitespace'])
|
||||
|
||||
def skip_comment(self):
|
||||
return self.skip(['comment'])
|
||||
|
||||
def skip_whitespace_and_comment(self):
|
||||
return self.skip(['comment', 'whitespace'])
|
||||
|
||||
|
||||
class Selector:
|
||||
def __init__(self, tree, pseudo_element=None):
|
||||
self.parsed_tree = tree
|
||||
self.pseudo_element = pseudo_element
|
||||
if pseudo_element is None:
|
||||
#: Tuple of 3 integers: http://www.w3.org/TR/selectors/#specificity
|
||||
self.specificity = tree.specificity
|
||||
else:
|
||||
a, b, c = tree.specificity
|
||||
self.specificity = a, b, c + 1
|
||||
|
||||
def __repr__(self):
|
||||
pseudo = f'::{self.pseudo_element}' if self.pseudo_element else ''
|
||||
return f'{self.parsed_tree!r}{pseudo}'
|
||||
|
||||
|
||||
class RelativeSelector:
|
||||
def __init__(self, combinator, selector):
|
||||
self.combinator = combinator
|
||||
self.selector = selector
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
return self.selector.specificity
|
||||
|
||||
@property
|
||||
def pseudo_element(self):
|
||||
return self.selector.pseudo_element
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f'{self.selector!r}' if self.combinator == ' '
|
||||
else f'{self.combinator} {self.selector!r}')
|
||||
|
||||
|
||||
class CombinedSelector:
|
||||
def __init__(self, left, combinator, right):
|
||||
#: Combined or compound selector
|
||||
self.left = left
|
||||
# One of `` `` (a single space), ``>``, ``+`` or ``~``.
|
||||
self.combinator = combinator
|
||||
#: compound selector
|
||||
self.right = right
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
a1, b1, c1 = self.left.specificity
|
||||
a2, b2, c2 = self.right.specificity
|
||||
return a1 + a2, b1 + b2, c1 + c2
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.left!r}{self.combinator}{self.right!r}'
|
||||
|
||||
|
||||
class CompoundSelector:
|
||||
def __init__(self, simple_selectors):
|
||||
self.simple_selectors = simple_selectors
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
if self.simple_selectors:
|
||||
# zip(*foo) turns [(a1, b1, c1), (a2, b2, c2), ...]
|
||||
# into [(a1, a2, ...), (b1, b2, ...), (c1, c2, ...)]
|
||||
return tuple(map(sum, zip(
|
||||
*(sel.specificity for sel in self.simple_selectors))))
|
||||
else:
|
||||
return 0, 0, 0
|
||||
|
||||
def __repr__(self):
|
||||
return ''.join(map(repr, self.simple_selectors))
|
||||
|
||||
|
||||
class LocalNameSelector:
|
||||
specificity = 0, 0, 1
|
||||
|
||||
def __init__(self, local_name):
|
||||
self.local_name, self.lower_local_name = local_name
|
||||
|
||||
def __repr__(self):
|
||||
return self.local_name
|
||||
|
||||
|
||||
class NamespaceSelector:
|
||||
specificity = 0, 0, 0
|
||||
|
||||
def __init__(self, namespace):
|
||||
#: The namespace URL as a string,
|
||||
#: or the empty string for elements not in any namespace.
|
||||
self.namespace = namespace
|
||||
|
||||
def __repr__(self):
|
||||
return '|' if self.namespace == '' else f'{{{self.namespace}}}|'
|
||||
|
||||
|
||||
class IDSelector:
|
||||
specificity = 1, 0, 0
|
||||
|
||||
def __init__(self, ident):
|
||||
self.ident = ident
|
||||
|
||||
def __repr__(self):
|
||||
return f'#{self.ident}'
|
||||
|
||||
|
||||
class ClassSelector:
|
||||
specificity = 0, 1, 0
|
||||
|
||||
def __init__(self, class_name):
|
||||
self.class_name = class_name
|
||||
|
||||
def __repr__(self):
|
||||
return f'.{self.class_name}'
|
||||
|
||||
|
||||
class AttributeSelector:
|
||||
specificity = 0, 1, 0
|
||||
|
||||
def __init__(self, namespace, name, operator, value, case_sensitive):
|
||||
self.namespace = namespace
|
||||
self.name, self.lower_name = name
|
||||
#: A string like ``=`` or ``~=``, or None for ``[attr]`` selectors
|
||||
self.operator = operator
|
||||
#: A string, or None for ``[attr]`` selectors
|
||||
self.value = value
|
||||
#: ``True`` if case-sensitive, ``False`` if case-insensitive, ``None``
|
||||
#: if depends on the document language
|
||||
self.case_sensitive = case_sensitive
|
||||
|
||||
def __repr__(self):
|
||||
namespace = '*|' if self.namespace is None else f'{{{self.namespace}}}'
|
||||
case_sensitive = (
|
||||
'' if self.case_sensitive is None else
|
||||
f' {"s" if self.case_sensitive else "i"}')
|
||||
return (
|
||||
f'[{namespace}{self.name}{self.operator}{self.value!r}'
|
||||
f'{case_sensitive}]')
|
||||
|
||||
|
||||
class PseudoClassSelector:
|
||||
specificity = 0, 1, 0
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return ':' + self.name
|
||||
|
||||
|
||||
class FunctionalPseudoClassSelector:
|
||||
specificity = 0, 1, 0
|
||||
|
||||
def __init__(self, name, arguments):
|
||||
self.name = name
|
||||
self.arguments = arguments
|
||||
|
||||
def __repr__(self):
|
||||
return f':{self.name}{tuple(self.arguments)!r}'
|
||||
|
||||
|
||||
class NegationSelector:
|
||||
def __init__(self, selector_list):
|
||||
self.selector_list = selector_list
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
if self.selector_list:
|
||||
return max(selector.specificity for selector in self.selector_list)
|
||||
else:
|
||||
return (0, 0, 0)
|
||||
|
||||
def __repr__(self):
|
||||
return f':not({", ".join(repr(sel) for sel in self.selector_list)})'
|
||||
|
||||
|
||||
class RelationalSelector:
|
||||
def __init__(self, selector_list):
|
||||
self.selector_list = selector_list
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
if self.selector_list:
|
||||
return max(selector.specificity for selector in self.selector_list)
|
||||
else:
|
||||
return (0, 0, 0)
|
||||
|
||||
def __repr__(self):
|
||||
return f':has({", ".join(repr(sel) for sel in self.selector_list)})'
|
||||
|
||||
|
||||
class MatchesAnySelector:
|
||||
def __init__(self, selector_list):
|
||||
self.selector_list = selector_list
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
if self.selector_list:
|
||||
return max(selector.specificity for selector in self.selector_list)
|
||||
else:
|
||||
return (0, 0, 0)
|
||||
|
||||
def __repr__(self):
|
||||
return f':is({", ".join(repr(sel) for sel in self.selector_list)})'
|
||||
|
||||
|
||||
class SpecificityAdjustmentSelector:
|
||||
def __init__(self, selector_list):
|
||||
self.selector_list = selector_list
|
||||
|
||||
@property
|
||||
def specificity(self):
|
||||
return (0, 0, 0)
|
||||
|
||||
def __repr__(self):
|
||||
return f':where({", ".join(repr(sel) for sel in self.selector_list)})'
|
||||
385
app/.venv/Lib/site-packages/cssselect2/tree.py
Normal file
385
app/.venv/Lib/site-packages/cssselect2/tree.py
Normal file
@@ -0,0 +1,385 @@
|
||||
from functools import cached_property
|
||||
from warnings import warn
|
||||
|
||||
from webencodings import ascii_lower
|
||||
|
||||
from .compiler import compile_selector_list, split_whitespace
|
||||
|
||||
|
||||
class ElementWrapper:
|
||||
"""Wrapper of :class:`xml.etree.ElementTree.Element` for Selector matching.
|
||||
|
||||
This class should not be instanciated directly. :meth:`from_xml_root` or
|
||||
:meth:`from_html_root` should be used for the root element of a document,
|
||||
and other elements should be accessed (and wrappers generated) using
|
||||
methods such as :meth:`iter_children` and :meth:`iter_subtree`.
|
||||
|
||||
:class:`ElementWrapper` objects compare equal if their underlying
|
||||
:class:`xml.etree.ElementTree.Element` do.
|
||||
|
||||
"""
|
||||
@classmethod
|
||||
def from_xml_root(cls, root, content_language=None):
|
||||
"""Wrap for selector matching the root of an XML or XHTML document.
|
||||
|
||||
:param root:
|
||||
An ElementTree :class:`xml.etree.ElementTree.Element`
|
||||
for the root element of a document.
|
||||
If the given element is not the root,
|
||||
selector matching will behave is if it were.
|
||||
In other words, selectors will be not be `scoped`_
|
||||
to the subtree rooted at that element.
|
||||
:returns:
|
||||
A new :class:`ElementWrapper`
|
||||
|
||||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||||
|
||||
"""
|
||||
return cls._from_root(root, content_language, in_html_document=False)
|
||||
|
||||
@classmethod
|
||||
def from_html_root(cls, root, content_language=None):
|
||||
"""Same as :meth:`from_xml_root` with case-insensitive attribute names.
|
||||
|
||||
Useful for documents parsed with an HTML parser like html5lib, which
|
||||
should be the case of documents with the ``text/html`` MIME type.
|
||||
|
||||
"""
|
||||
return cls._from_root(root, content_language, in_html_document=True)
|
||||
|
||||
@classmethod
|
||||
def _from_root(cls, root, content_language, in_html_document=True):
|
||||
if hasattr(root, 'getroot'):
|
||||
root = root.getroot()
|
||||
return cls(
|
||||
root, parent=None, index=0, previous=None,
|
||||
in_html_document=in_html_document, content_language=content_language)
|
||||
|
||||
def __init__(self, etree_element, parent, index, previous,
|
||||
in_html_document, content_language=None):
|
||||
#: The underlying ElementTree :class:`xml.etree.ElementTree.Element`
|
||||
self.etree_element = etree_element
|
||||
#: The parent :class:`ElementWrapper`,
|
||||
#: or :obj:`None` for the root element.
|
||||
self.parent = parent
|
||||
#: The previous sibling :class:`ElementWrapper`,
|
||||
#: or :obj:`None` for the root element.
|
||||
self.previous = previous
|
||||
if parent is not None:
|
||||
#: The :attr:`parent`’s children
|
||||
#: as a list of
|
||||
#: ElementTree :class:`xml.etree.ElementTree.Element`\ s.
|
||||
#: For the root (which has no parent)
|
||||
self.etree_siblings = parent.etree_children
|
||||
else:
|
||||
self.etree_siblings = [etree_element]
|
||||
#: The position within the :attr:`parent`’s children, counting from 0.
|
||||
#: ``e.etree_siblings[e.index]`` is always ``e.etree_element``.
|
||||
self.index = index
|
||||
self.in_html_document = in_html_document
|
||||
self.transport_content_language = content_language
|
||||
|
||||
# Cache
|
||||
self._ancestors = None
|
||||
self._previous_siblings = None
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
type(self) is type(other) and
|
||||
self.etree_element == other.etree_element)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((type(self), self.etree_element))
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.iter_children()
|
||||
|
||||
@property
|
||||
def ancestors(self):
|
||||
"""Tuple of existing ancestors.
|
||||
|
||||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||||
ancestors, in reversed tree order, from :attr:`parent` to the root.
|
||||
|
||||
"""
|
||||
if self._ancestors is None:
|
||||
self._ancestors = (
|
||||
() if self.parent is None else (*self.parent.ancestors, self.parent))
|
||||
return self._ancestors
|
||||
|
||||
@property
|
||||
def previous_siblings(self):
|
||||
"""Tuple of previous siblings.
|
||||
|
||||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||||
previous siblings, in reversed tree order.
|
||||
|
||||
"""
|
||||
if self._previous_siblings is None:
|
||||
self._previous_siblings = (
|
||||
() if self.previous is None else
|
||||
(*self.previous.previous_siblings, self.previous))
|
||||
return self._previous_siblings
|
||||
|
||||
def iter_ancestors(self):
|
||||
"""Iterate over ancestors.
|
||||
|
||||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||||
element’s ancestors, in reversed tree order (from :attr:`parent` to the
|
||||
root).
|
||||
|
||||
The element itself is not included, this is an empty sequence for the
|
||||
root element.
|
||||
|
||||
This method is deprecated and will be removed in version 0.7.0. Use
|
||||
:attr:`ancestors` instead.
|
||||
|
||||
"""
|
||||
warn(
|
||||
'This method is deprecated and will be removed in version 0.7.0. '
|
||||
'Use the "ancestors" attribute instead.',
|
||||
DeprecationWarning)
|
||||
yield from self.ancestors
|
||||
|
||||
def iter_previous_siblings(self):
|
||||
"""Iterate over previous siblings.
|
||||
|
||||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||||
element’s previous siblings, in reversed tree order.
|
||||
|
||||
The element itself is not included, this is an empty sequence for a
|
||||
first child or the root element.
|
||||
|
||||
This method is deprecated and will be removed in version 0.7.0. Use
|
||||
:attr:`previous_siblings` instead.
|
||||
|
||||
"""
|
||||
warn(
|
||||
'This method is deprecated and will be removed in version 0.7.0. '
|
||||
'Use the "previous_siblings" attribute instead.',
|
||||
DeprecationWarning)
|
||||
yield from self.previous_siblings
|
||||
|
||||
def iter_siblings(self):
|
||||
"""Iterate over siblings.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s siblings, in tree order.
|
||||
|
||||
"""
|
||||
if self.parent is None:
|
||||
yield self
|
||||
else:
|
||||
yield from self.parent.iter_children()
|
||||
|
||||
def iter_next_siblings(self):
|
||||
"""Iterate over next siblings.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s next siblings, in tree order.
|
||||
|
||||
"""
|
||||
found = False
|
||||
for sibling in self.iter_siblings():
|
||||
if found:
|
||||
yield sibling
|
||||
if sibling == self:
|
||||
found = True
|
||||
|
||||
def iter_children(self):
|
||||
"""Iterate over children.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s child elements, in tree order.
|
||||
|
||||
"""
|
||||
child = None
|
||||
for i, etree_child in enumerate(self.etree_children):
|
||||
child = type(self)(
|
||||
etree_child, parent=self, index=i, previous=child,
|
||||
in_html_document=self.in_html_document)
|
||||
yield child
|
||||
|
||||
def iter_subtree(self):
|
||||
"""Iterate over subtree.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
the entire subtree rooted at this element, in tree order.
|
||||
|
||||
Unlike in other methods, the element itself *is* included.
|
||||
|
||||
This loops over an entire document:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
for element in ElementWrapper.from_root(root_etree).iter_subtree():
|
||||
...
|
||||
|
||||
"""
|
||||
stack = [iter([self])]
|
||||
while stack:
|
||||
element = next(stack[-1], None)
|
||||
if element is None:
|
||||
stack.pop()
|
||||
else:
|
||||
yield element
|
||||
stack.append(element.iter_children())
|
||||
|
||||
@staticmethod
|
||||
def _compile(selectors):
|
||||
return [
|
||||
compiled_selector.test
|
||||
for selector in selectors
|
||||
for compiled_selector in (
|
||||
[selector] if hasattr(selector, 'test')
|
||||
else compile_selector_list(selector))
|
||||
if compiled_selector.pseudo_element is None and
|
||||
not compiled_selector.never_matches]
|
||||
|
||||
def matches(self, *selectors):
|
||||
"""Return wether this elememt matches any of the given selectors.
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
|
||||
"""
|
||||
return any(test(self) for test in self._compile(selectors))
|
||||
|
||||
def query_all(self, *selectors):
|
||||
"""Return elements, in tree order, that match any of given selectors.
|
||||
|
||||
Selectors are `scoped`_ to the subtree rooted at this element.
|
||||
|
||||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
:returns:
|
||||
An iterator of newly-created :class:`ElementWrapper` objects.
|
||||
|
||||
"""
|
||||
tests = self._compile(selectors)
|
||||
if len(tests) == 1:
|
||||
return filter(tests[0], self.iter_subtree())
|
||||
elif selectors:
|
||||
return (
|
||||
element for element in self.iter_subtree()
|
||||
if any(test(element) for test in tests))
|
||||
else:
|
||||
return iter(())
|
||||
|
||||
def query(self, *selectors):
|
||||
"""Return first element that matches any of given selectors.
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
:returns:
|
||||
A newly-created :class:`ElementWrapper` object,
|
||||
or :obj:`None` if there is no match.
|
||||
|
||||
"""
|
||||
return next(self.query_all(*selectors), None)
|
||||
|
||||
@cached_property
|
||||
def etree_children(self):
|
||||
"""Children as a list of :class:`xml.etree.ElementTree.Element`.
|
||||
|
||||
Other ElementTree nodes such as
|
||||
:func:`comments <xml.etree.ElementTree.Comment>` and
|
||||
:func:`processing instructions
|
||||
<xml.etree.ElementTree.ProcessingInstruction>`
|
||||
are not included.
|
||||
|
||||
"""
|
||||
return [
|
||||
element for element in self.etree_element
|
||||
if isinstance(element.tag, str)]
|
||||
|
||||
@cached_property
|
||||
def local_name(self):
|
||||
"""The local name of this element, as a string."""
|
||||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||||
self.__dict__[str('namespace_url')] = namespace_url
|
||||
return local_name
|
||||
|
||||
@cached_property
|
||||
def namespace_url(self):
|
||||
"""The namespace URL of this element, as a string."""
|
||||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||||
self.__dict__[str('local_name')] = local_name
|
||||
return namespace_url
|
||||
|
||||
@cached_property
|
||||
def id(self):
|
||||
"""The ID of this element, as a string."""
|
||||
return self.etree_element.get('id')
|
||||
|
||||
@cached_property
|
||||
def classes(self):
|
||||
"""The classes of this element, as a :class:`set` of strings."""
|
||||
return set(split_whitespace(self.etree_element.get('class', '')))
|
||||
|
||||
@cached_property
|
||||
def lang(self):
|
||||
"""The language of this element, as a string."""
|
||||
# http://whatwg.org/C#language
|
||||
xml_lang = self.etree_element.get('{http://www.w3.org/XML/1998/namespace}lang')
|
||||
if xml_lang is not None:
|
||||
return ascii_lower(xml_lang)
|
||||
is_html = (
|
||||
self.in_html_document or
|
||||
self.namespace_url == 'http://www.w3.org/1999/xhtml')
|
||||
if is_html:
|
||||
lang = self.etree_element.get('lang')
|
||||
if lang is not None:
|
||||
return ascii_lower(lang)
|
||||
if self.parent is not None:
|
||||
return self.parent.lang
|
||||
# Root elememnt
|
||||
if is_html:
|
||||
content_language = None
|
||||
iterator = self.etree_element.iter('{http://www.w3.org/1999/xhtml}meta')
|
||||
for meta in iterator:
|
||||
http_equiv = meta.get('http-equiv', '')
|
||||
if ascii_lower(http_equiv) == 'content-language':
|
||||
content_language = _parse_content_language(meta.get('content'))
|
||||
if content_language is not None:
|
||||
return ascii_lower(content_language)
|
||||
# Empty string means unknown
|
||||
return _parse_content_language(self.transport_content_language) or ''
|
||||
|
||||
@cached_property
|
||||
def in_disabled_fieldset(self):
|
||||
if self.parent is None:
|
||||
return False
|
||||
fieldset = '{http://www.w3.org/1999/xhtml}fieldset'
|
||||
legend = '{http://www.w3.org/1999/xhtml}legend'
|
||||
disabled_fieldset = (
|
||||
self.parent.etree_element.tag == fieldset and
|
||||
self.parent.etree_element.get('disabled') is not None and (
|
||||
self.etree_element.tag != legend or any(
|
||||
sibling.etree_element.tag == legend
|
||||
for sibling in self.iter_previous_siblings())))
|
||||
return disabled_fieldset or self.parent.in_disabled_fieldset
|
||||
|
||||
|
||||
def _split_etree_tag(tag):
|
||||
position = tag.rfind('}')
|
||||
if position == -1:
|
||||
return '', tag
|
||||
else:
|
||||
assert tag[0] == '{'
|
||||
return tag[1:position], tag[position+1:]
|
||||
|
||||
|
||||
def _parse_content_language(value):
|
||||
if value is not None and ',' not in value:
|
||||
parts = split_whitespace(value)
|
||||
if len(parts) == 1:
|
||||
return parts[0]
|
||||
Reference in New Issue
Block a user