feat: add comprehensive GitHub workflow and development tools
This commit is contained in:
385
app/.venv/Lib/site-packages/cssselect2/tree.py
Normal file
385
app/.venv/Lib/site-packages/cssselect2/tree.py
Normal file
@@ -0,0 +1,385 @@
|
||||
from functools import cached_property
|
||||
from warnings import warn
|
||||
|
||||
from webencodings import ascii_lower
|
||||
|
||||
from .compiler import compile_selector_list, split_whitespace
|
||||
|
||||
|
||||
class ElementWrapper:
|
||||
"""Wrapper of :class:`xml.etree.ElementTree.Element` for Selector matching.
|
||||
|
||||
This class should not be instanciated directly. :meth:`from_xml_root` or
|
||||
:meth:`from_html_root` should be used for the root element of a document,
|
||||
and other elements should be accessed (and wrappers generated) using
|
||||
methods such as :meth:`iter_children` and :meth:`iter_subtree`.
|
||||
|
||||
:class:`ElementWrapper` objects compare equal if their underlying
|
||||
:class:`xml.etree.ElementTree.Element` do.
|
||||
|
||||
"""
|
||||
@classmethod
|
||||
def from_xml_root(cls, root, content_language=None):
|
||||
"""Wrap for selector matching the root of an XML or XHTML document.
|
||||
|
||||
:param root:
|
||||
An ElementTree :class:`xml.etree.ElementTree.Element`
|
||||
for the root element of a document.
|
||||
If the given element is not the root,
|
||||
selector matching will behave is if it were.
|
||||
In other words, selectors will be not be `scoped`_
|
||||
to the subtree rooted at that element.
|
||||
:returns:
|
||||
A new :class:`ElementWrapper`
|
||||
|
||||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||||
|
||||
"""
|
||||
return cls._from_root(root, content_language, in_html_document=False)
|
||||
|
||||
@classmethod
|
||||
def from_html_root(cls, root, content_language=None):
|
||||
"""Same as :meth:`from_xml_root` with case-insensitive attribute names.
|
||||
|
||||
Useful for documents parsed with an HTML parser like html5lib, which
|
||||
should be the case of documents with the ``text/html`` MIME type.
|
||||
|
||||
"""
|
||||
return cls._from_root(root, content_language, in_html_document=True)
|
||||
|
||||
@classmethod
|
||||
def _from_root(cls, root, content_language, in_html_document=True):
|
||||
if hasattr(root, 'getroot'):
|
||||
root = root.getroot()
|
||||
return cls(
|
||||
root, parent=None, index=0, previous=None,
|
||||
in_html_document=in_html_document, content_language=content_language)
|
||||
|
||||
def __init__(self, etree_element, parent, index, previous,
|
||||
in_html_document, content_language=None):
|
||||
#: The underlying ElementTree :class:`xml.etree.ElementTree.Element`
|
||||
self.etree_element = etree_element
|
||||
#: The parent :class:`ElementWrapper`,
|
||||
#: or :obj:`None` for the root element.
|
||||
self.parent = parent
|
||||
#: The previous sibling :class:`ElementWrapper`,
|
||||
#: or :obj:`None` for the root element.
|
||||
self.previous = previous
|
||||
if parent is not None:
|
||||
#: The :attr:`parent`’s children
|
||||
#: as a list of
|
||||
#: ElementTree :class:`xml.etree.ElementTree.Element`\ s.
|
||||
#: For the root (which has no parent)
|
||||
self.etree_siblings = parent.etree_children
|
||||
else:
|
||||
self.etree_siblings = [etree_element]
|
||||
#: The position within the :attr:`parent`’s children, counting from 0.
|
||||
#: ``e.etree_siblings[e.index]`` is always ``e.etree_element``.
|
||||
self.index = index
|
||||
self.in_html_document = in_html_document
|
||||
self.transport_content_language = content_language
|
||||
|
||||
# Cache
|
||||
self._ancestors = None
|
||||
self._previous_siblings = None
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
type(self) is type(other) and
|
||||
self.etree_element == other.etree_element)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((type(self), self.etree_element))
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.iter_children()
|
||||
|
||||
@property
|
||||
def ancestors(self):
|
||||
"""Tuple of existing ancestors.
|
||||
|
||||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||||
ancestors, in reversed tree order, from :attr:`parent` to the root.
|
||||
|
||||
"""
|
||||
if self._ancestors is None:
|
||||
self._ancestors = (
|
||||
() if self.parent is None else (*self.parent.ancestors, self.parent))
|
||||
return self._ancestors
|
||||
|
||||
@property
|
||||
def previous_siblings(self):
|
||||
"""Tuple of previous siblings.
|
||||
|
||||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||||
previous siblings, in reversed tree order.
|
||||
|
||||
"""
|
||||
if self._previous_siblings is None:
|
||||
self._previous_siblings = (
|
||||
() if self.previous is None else
|
||||
(*self.previous.previous_siblings, self.previous))
|
||||
return self._previous_siblings
|
||||
|
||||
def iter_ancestors(self):
|
||||
"""Iterate over ancestors.
|
||||
|
||||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||||
element’s ancestors, in reversed tree order (from :attr:`parent` to the
|
||||
root).
|
||||
|
||||
The element itself is not included, this is an empty sequence for the
|
||||
root element.
|
||||
|
||||
This method is deprecated and will be removed in version 0.7.0. Use
|
||||
:attr:`ancestors` instead.
|
||||
|
||||
"""
|
||||
warn(
|
||||
'This method is deprecated and will be removed in version 0.7.0. '
|
||||
'Use the "ancestors" attribute instead.',
|
||||
DeprecationWarning)
|
||||
yield from self.ancestors
|
||||
|
||||
def iter_previous_siblings(self):
|
||||
"""Iterate over previous siblings.
|
||||
|
||||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||||
element’s previous siblings, in reversed tree order.
|
||||
|
||||
The element itself is not included, this is an empty sequence for a
|
||||
first child or the root element.
|
||||
|
||||
This method is deprecated and will be removed in version 0.7.0. Use
|
||||
:attr:`previous_siblings` instead.
|
||||
|
||||
"""
|
||||
warn(
|
||||
'This method is deprecated and will be removed in version 0.7.0. '
|
||||
'Use the "previous_siblings" attribute instead.',
|
||||
DeprecationWarning)
|
||||
yield from self.previous_siblings
|
||||
|
||||
def iter_siblings(self):
|
||||
"""Iterate over siblings.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s siblings, in tree order.
|
||||
|
||||
"""
|
||||
if self.parent is None:
|
||||
yield self
|
||||
else:
|
||||
yield from self.parent.iter_children()
|
||||
|
||||
def iter_next_siblings(self):
|
||||
"""Iterate over next siblings.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s next siblings, in tree order.
|
||||
|
||||
"""
|
||||
found = False
|
||||
for sibling in self.iter_siblings():
|
||||
if found:
|
||||
yield sibling
|
||||
if sibling == self:
|
||||
found = True
|
||||
|
||||
def iter_children(self):
|
||||
"""Iterate over children.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
this element’s child elements, in tree order.
|
||||
|
||||
"""
|
||||
child = None
|
||||
for i, etree_child in enumerate(self.etree_children):
|
||||
child = type(self)(
|
||||
etree_child, parent=self, index=i, previous=child,
|
||||
in_html_document=self.in_html_document)
|
||||
yield child
|
||||
|
||||
def iter_subtree(self):
|
||||
"""Iterate over subtree.
|
||||
|
||||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||||
the entire subtree rooted at this element, in tree order.
|
||||
|
||||
Unlike in other methods, the element itself *is* included.
|
||||
|
||||
This loops over an entire document:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
for element in ElementWrapper.from_root(root_etree).iter_subtree():
|
||||
...
|
||||
|
||||
"""
|
||||
stack = [iter([self])]
|
||||
while stack:
|
||||
element = next(stack[-1], None)
|
||||
if element is None:
|
||||
stack.pop()
|
||||
else:
|
||||
yield element
|
||||
stack.append(element.iter_children())
|
||||
|
||||
@staticmethod
|
||||
def _compile(selectors):
|
||||
return [
|
||||
compiled_selector.test
|
||||
for selector in selectors
|
||||
for compiled_selector in (
|
||||
[selector] if hasattr(selector, 'test')
|
||||
else compile_selector_list(selector))
|
||||
if compiled_selector.pseudo_element is None and
|
||||
not compiled_selector.never_matches]
|
||||
|
||||
def matches(self, *selectors):
|
||||
"""Return wether this elememt matches any of the given selectors.
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
|
||||
"""
|
||||
return any(test(self) for test in self._compile(selectors))
|
||||
|
||||
def query_all(self, *selectors):
|
||||
"""Return elements, in tree order, that match any of given selectors.
|
||||
|
||||
Selectors are `scoped`_ to the subtree rooted at this element.
|
||||
|
||||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
:returns:
|
||||
An iterator of newly-created :class:`ElementWrapper` objects.
|
||||
|
||||
"""
|
||||
tests = self._compile(selectors)
|
||||
if len(tests) == 1:
|
||||
return filter(tests[0], self.iter_subtree())
|
||||
elif selectors:
|
||||
return (
|
||||
element for element in self.iter_subtree()
|
||||
if any(test(element) for test in tests))
|
||||
else:
|
||||
return iter(())
|
||||
|
||||
def query(self, *selectors):
|
||||
"""Return first element that matches any of given selectors.
|
||||
|
||||
:param selectors:
|
||||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||||
or an argument to :func:`compile_selector_list`.
|
||||
:returns:
|
||||
A newly-created :class:`ElementWrapper` object,
|
||||
or :obj:`None` if there is no match.
|
||||
|
||||
"""
|
||||
return next(self.query_all(*selectors), None)
|
||||
|
||||
@cached_property
|
||||
def etree_children(self):
|
||||
"""Children as a list of :class:`xml.etree.ElementTree.Element`.
|
||||
|
||||
Other ElementTree nodes such as
|
||||
:func:`comments <xml.etree.ElementTree.Comment>` and
|
||||
:func:`processing instructions
|
||||
<xml.etree.ElementTree.ProcessingInstruction>`
|
||||
are not included.
|
||||
|
||||
"""
|
||||
return [
|
||||
element for element in self.etree_element
|
||||
if isinstance(element.tag, str)]
|
||||
|
||||
@cached_property
|
||||
def local_name(self):
|
||||
"""The local name of this element, as a string."""
|
||||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||||
self.__dict__[str('namespace_url')] = namespace_url
|
||||
return local_name
|
||||
|
||||
@cached_property
|
||||
def namespace_url(self):
|
||||
"""The namespace URL of this element, as a string."""
|
||||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||||
self.__dict__[str('local_name')] = local_name
|
||||
return namespace_url
|
||||
|
||||
@cached_property
|
||||
def id(self):
|
||||
"""The ID of this element, as a string."""
|
||||
return self.etree_element.get('id')
|
||||
|
||||
@cached_property
|
||||
def classes(self):
|
||||
"""The classes of this element, as a :class:`set` of strings."""
|
||||
return set(split_whitespace(self.etree_element.get('class', '')))
|
||||
|
||||
@cached_property
|
||||
def lang(self):
|
||||
"""The language of this element, as a string."""
|
||||
# http://whatwg.org/C#language
|
||||
xml_lang = self.etree_element.get('{http://www.w3.org/XML/1998/namespace}lang')
|
||||
if xml_lang is not None:
|
||||
return ascii_lower(xml_lang)
|
||||
is_html = (
|
||||
self.in_html_document or
|
||||
self.namespace_url == 'http://www.w3.org/1999/xhtml')
|
||||
if is_html:
|
||||
lang = self.etree_element.get('lang')
|
||||
if lang is not None:
|
||||
return ascii_lower(lang)
|
||||
if self.parent is not None:
|
||||
return self.parent.lang
|
||||
# Root elememnt
|
||||
if is_html:
|
||||
content_language = None
|
||||
iterator = self.etree_element.iter('{http://www.w3.org/1999/xhtml}meta')
|
||||
for meta in iterator:
|
||||
http_equiv = meta.get('http-equiv', '')
|
||||
if ascii_lower(http_equiv) == 'content-language':
|
||||
content_language = _parse_content_language(meta.get('content'))
|
||||
if content_language is not None:
|
||||
return ascii_lower(content_language)
|
||||
# Empty string means unknown
|
||||
return _parse_content_language(self.transport_content_language) or ''
|
||||
|
||||
@cached_property
|
||||
def in_disabled_fieldset(self):
|
||||
if self.parent is None:
|
||||
return False
|
||||
fieldset = '{http://www.w3.org/1999/xhtml}fieldset'
|
||||
legend = '{http://www.w3.org/1999/xhtml}legend'
|
||||
disabled_fieldset = (
|
||||
self.parent.etree_element.tag == fieldset and
|
||||
self.parent.etree_element.get('disabled') is not None and (
|
||||
self.etree_element.tag != legend or any(
|
||||
sibling.etree_element.tag == legend
|
||||
for sibling in self.iter_previous_siblings())))
|
||||
return disabled_fieldset or self.parent.in_disabled_fieldset
|
||||
|
||||
|
||||
def _split_etree_tag(tag):
|
||||
position = tag.rfind('}')
|
||||
if position == -1:
|
||||
return '', tag
|
||||
else:
|
||||
assert tag[0] == '{'
|
||||
return tag[1:position], tag[position+1:]
|
||||
|
||||
|
||||
def _parse_content_language(value):
|
||||
if value is not None and ',' not in value:
|
||||
parts = split_whitespace(value)
|
||||
if len(parts) == 1:
|
||||
return parts[0]
|
||||
Reference in New Issue
Block a user