Source code for ocx_schema_parser.xelement

#  Copyright (c) 2022-2025. OCX Consortium https://3docx.org. See the LICENSE

# Sys imports
import re
from typing import Any, Dict, List, Union

# Third party imports
from loguru import logger
from lxml import etree
from lxml.etree import Element, ElementTextIterator, QName


[docs]class LxmlElement: """A wrapper class for the lxml etree.Element class main functions."""
[docs] @staticmethod def items(element: Element) -> List: """Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. Args: element: The current etree.Element node Returns: A List of all attributes of ``element`` """ return element.items()
[docs] @staticmethod def get_root(element: Element) -> Element: """Return Element root node of the document that contains this element. Args: element: The current etree.Element node Returns: The root Element node of the document for this element """ et = element.getroottree() return et.getroot()
[docs] @staticmethod def get_base(element: Element) -> str: """The base URI of the document holding the Element (the location of the document) Args: element: The current etree.Element node Returns: The base URI of the document or empty string if unknown """ base = element.base if base is None: return "" else: return base
[docs] @staticmethod def get_source_line(element: Element) -> int: """Original line number as found by the parser or None if unknown. Args: element: The current etree.Element node Returns: The line number of the ``element`` in the source """ return element.sourceline
[docs] @staticmethod def get_parent(element: Element) -> Element: """Returns the parent of this element or None for the root element. Args: element: The current ``etree.Element`` node Returns: The parent Element node of the document for this element or none if ``element`` is the root """ return element.getparent()
[docs] @staticmethod def get_children(element: Element) -> List[Element]: """Returns all direct children of the xml element Args: element: The XML parent node Returns: A List of all children of 'element' excluding the parent itself. """ return element.findall(".//")
[docs] @staticmethod def get_xml_attrib(element: Element) -> Dict: """The XML attributes of an element Args: element: The XML parent node Returns: A dictionary of ``(key,value)`` pairs of the element attributes """ return element.attrib
[docs] @staticmethod def get_localname(element: Element) -> str: """The local name (type) of an XML element Args: element: The XML parent node Returns: The element local name """ qn = QName(element) return qn.localname
[docs] @classmethod def unique_tag(cls, element: Element) -> Any: """The unique tag of an XML element: ``{namespace}name`` Args: element: The XML parent node Returns: The element unique tag """ name = element.get("name") ns = cls.get_namespace(element) return f"{LxmlElement.namespaces_decorate(ns)}{name}"
[docs] @staticmethod def get_name(element: Element) -> Any: """The name of an XML element defined by the attribute ``name`` Args: element: The XML parent node Returns: The element local name or None if no name """ name = element.get("name") if name is None: name = LxmlElement.strip_namespace_prefix(element.get("ref")) return name
[docs] @staticmethod def get_use(element: Element) -> Any: """Whether XML element is required or optional given by the attribute value ``use`` Args: element: The XML parent node Returns: The element use as a string, either ``required`` or ``optional`` """ use = element.get("use") if use is None: return "opt." if use == "required": use = "req." return use
[docs] @staticmethod def get_reference(element: Element) -> Any: """The referenced XML element defined by the attribute ``ref`` Args: element: The XML parent node Returns: The referenced element including namespace prefix if any. None if there is no reference """ return element.get("ref")
[docs] @staticmethod def is_enumeration(element: Element) -> bool: """Whether the attribute is an enumeration or not Args: element: The XML parent node Returns: true if the attribute is an enumeratos, false otherwise """ return LxmlElement.has_child_with_name(element, "enumeration")
[docs] @staticmethod def is_reference(element: Element) -> bool: """Whether the element is a reference or not Args: element: The XML parent node Returns: true if the element is a reference, false otherwise """ return element.get("use") is not None
[docs] @staticmethod def is_mandatory(element: Element) -> bool: """The element use. True if required, False otherwise Args: element: The element node Returns: True if the element is mandatory, false otherwise """ lower, upper = LxmlElement.cardinality(element) if lower == 0: return False else: return True
[docs] @classmethod def cardinality_string(cls, element) -> str: """Return the element cardinality formatted string.""" lower, upper = cls.cardinality(element) if upper == "unbounded": upper = "\u221e" # UTF-8 Infinity symbol return f"[{lower}, {upper}]"
[docs] @staticmethod def cardinality(element) -> tuple: """Establish the cardinality of the ``Element`` Args: element: the ``etree.Element`` instance Returns: The tuple of the element lower and upper bounds (lower, upper) """ attributes = LxmlElement.get_xml_attrib(element) if "minOccurs" in attributes: lower = int(attributes["minOccurs"]) else: use = attributes.get("use") if use is not None: if use == "req" or use == "required": lower = 1 else: lower = 0 else: lower = 1 if "maxOccurs" in attributes: upper = attributes["maxOccurs"] else: upper = 1 # Find the closest xs:sequence or xs:choice ancestor which overrules mandatory use for item in element.iterancestors("{*}sequence", "{*}choice"): attributes = item.attrib if "minOccurs" in attributes: lower = int(attributes["minOccurs"]) if "maxOccurs" in attributes: upper = attributes["maxOccurs"] break return lower, upper
[docs] @staticmethod def is_choice(element) -> bool: """Return True if the element ancestor is an xs:choice, False otherwise Args: element: the etree.Element instance Returns: True if the element node is a choice, false otherwise """ choice = False # Find the closest sequence or choice ancestor which overrules mandatory use for item in element.iterancestors("{*}sequence", "{*}choice"): qn = QName(item) if qn.localname == "choice": choice = True break return choice
[docs] @staticmethod def is_substitution_group(element) -> bool: """Return True if the element is a substitution Args: element: the etree.Element instance Returns: True if the element is a substitutionGroup, false otherwise """ attributes = LxmlElement.get_xml_attrib(element) if "substitutionGroup" in attributes: return True else: return False
[docs] @staticmethod def is_abstract(element) -> bool: """Return True if the element is abstract Args: element: the ``etree.Element`` instance Returns: True if the element abstract, false otherwise """ attributes = LxmlElement.get_xml_attrib(element) if "abstract" in attributes: return True else: return False
[docs] @staticmethod def get_substitution_group(element: Element) -> str: """Return the name of the element's substitutionGroup Args: element: the ``etree.Element`` instance Returns: name of substitutionGroup, None if no substitutionGroup """ attributes = LxmlElement.get_xml_attrib(element) return attributes.get("substitutionGroup")
[docs] @staticmethod def get_restriction(element: Element) -> str: """Return the element restriction Args: element: the ``etree.Element`` instance Returns: restriction type """ restriction = "" for item in LxmlElement.iter(element, "{*}restriction"): restriction = item.get("base") return restriction
[docs] @staticmethod def get_element_text(element: Element) -> str: """The text between the element's start and end tags without any tail text. Args: element: the etree.Element instance Returns: The element text stripped of any special characters """ text = "" description = "" for text in ElementTextIterator(element, with_tail=False): description = description + text break text = re.sub("[\n\t\r]", "", description) # Strip off special characters return text
[docs] @staticmethod def get_namespace(element: Element) -> str: """The namespace of an XML element Args: element: The XML parent node Returns: The element namespace """ qn = QName(element) return qn.namespace
[docs] @staticmethod def iter(element: Element, tag=None, *tags) -> etree.ElementDepthFirstIterator: """Iterate over all elements in the subtree in document order (depth first pre-order), starting with this element. Can be restricted to find only elements with specific tags: pass ``{ns}localname`` as tag. Either or both of ns and localname can be * for a wildcard; ns can be empty for no namespace. ``localname`` is equivalent to ``{}localname`` (i.e. no namespace) but ``*`` is ``{*}*`` (any or no namespace), not ``{}*``. Passing multiple tags (or a sequence of tags) instead of a single tag will let the iterator return all elements matching any of these tags, in document order. Args: element: The etree.Element node to search from tag: The name of the child Returns: An iterator filtered by tags if specified. Example: .. highlight:: python .. code-block:: python for type in LxmlElement.iter(root, {*}complexType) print(type.tag) will iterate over all ``complexType`` tags and print the tag starting from the document root . """ return element.iter(tag, *tags)
[docs] @staticmethod def find_all_children_with_name( element: Element, child_name: str, namespace: str = "*" ) -> List: """Find all the XML element's children with name ``child_name`` Args: element: The XML parent node to search from child_name: The name of the child namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: A list of elements. Empty list if no children can be found """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}{child_name}" return element.findall(xpath)
[docs] @staticmethod def find_child_with_name( element: Element, child_name: str, namespace: str = "*" ) -> Element: """Find the first direct child of the XML element's children with name ``child_name`` Args: element: The XML parent node to search from child_name: The name of the child namespace: The search namespace. Default is the wildcard '*' matching any namespace Returns: The child element as etree.Element. None if no child can be found """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}{child_name}" return element.find(xpath)
[docs] @staticmethod def find_attributes(element: Element, namespace: str = "*") -> List[Element]: """Find all sub elements of type xs:attribute Args: element: The XML parent node to search from namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: The list of the xs:attribute type found """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}attribute" return element.findall(xpath)
[docs] @staticmethod def find_attribute_groups(element: Element, namespace: str = "*") -> List[Element]: """Find all sub elements of type ``xs:attributeGroup`` Args: element: The XML parent node to search from namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: Attribute groups """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}attributeGroup" return element.findall(xpath)
[docs] @staticmethod def has_child_with_name( element: Element, child_name: str, namespace: str = "*" ) -> bool: """Check if the element has a child with name 'child_name' Args: element: The XML parent node to search from child_name: The name of the child namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: True if the element has a child with name ``child_name`` False otherwise """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}{child_name}" return len(element.findall(xpath)) > 0
[docs] @staticmethod def find_all_children_with_attribute_value( element: Element, name: str, attrib_name: str, attrib_value: str, namespace: str = "*", ) -> List: """Find all the XML elements with the attribute name 'attrib_name' having a given value 'attrib_value' Args: element: The XML parent node to search from name: The name of the element with attrib_name and attrib_value attrib_name: The name of the attribute attrib_value: The value of the attribute namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: All children having attributes with name ``attrib_name`` and value ``attrib_value``. Empty list if no children can be found """ xpath = f'.//{LxmlElement.namespaces_decorate(namespace)}{name}[@{attrib_name}="{attrib_value}"]' return element.findall(xpath)
[docs] @staticmethod def find_all_children_with_name_and_attribute( element: Element, child_name: str, attrib_name: str, namespace: str = "*" ) -> List: """Find all the XML elements with name ``name`` and attribute name ``attrib_name`` Args: element: The XML parent node to search from child_name: The name of the children elements attrib_name: The name of the attribute namespace: The search namespace. Default is the wildcard matching any namespace Returns: All elements having attributes with name ``attrib_name``. An empty list if no children can be found """ xpath = f".//{LxmlElement.namespaces_decorate(namespace)}{child_name}[@{attrib_name}]" return element.findall(xpath)
[docs] @staticmethod # Finding the assertion in the code. def find_assertion(element: Element, namespace="*") -> Union[str, None]: """Find any assertions under the ``element`` Args: element: The XML parent node to search from namespace: The search namespace. Default is the wildcard ``*`` matching any namespace Returns: The assertion test as a string. None if no assertion tag is found """ test = None # Assertions xpath = f'.//{LxmlElement.namespaces_decorate(namespace)}{"assert"}' asserts = element.findall(xpath) if len(asserts) > 0: attrib = asserts[0].attrib if "test" in attrib: test = attrib["test"] return test
[docs] @staticmethod def namespace_prefix(element: str) -> Union[str, None]: """Returns the namespace prefix of an element if any Args: element: The element name with or without prefix as a string Returns: The element prefix string or None if no prefix """ i = element.find(":") if not i == -1: element = element[0:i] return element return None
[docs] @staticmethod def replace_ns_tag_with_ns_prefix(element: str, namespaces: Dict) -> str: """Replace the namespace tag with a mapped namespace prefix. Args: element: The element name with or without prefix as a string namespaces: the namespace tag to prefix mapping Returns: the element with prefix """ nsprefix = list(iter(namespaces)) nstags = list(iter(namespaces.values())) qn = QName(element) prefix = "" try: index = nstags.index(qn.namespace) prefix = nsprefix[index] if prefix is None: nstags.pop(index) nsprefix.pop(index) index = nstags.index(qn.namespace) prefix = nsprefix.index(index) except ValueError as e: logger.error(f"{qn.namespace} is not in the namespace list: {e}") if prefix == "": logger.debug(f"Empty namespace prefix in element {qn.localname}") return f"{prefix}:{qn.localname}"
[docs] @staticmethod def namespaces_decorate(ns: str) -> str: """Decorate a string with curly brackets to form a valid XML namespace Args: ns: The namespace Returns: The curly decorated namespace """ return "{" + ns + "}"
[docs] @staticmethod def strip_namespace_prefix(element: str) -> str: """Returns the element name without the namespace prefix Args: element: The element name with or without prefix as a string Returns: The element without namespace prefix """ i = element.find(":") if not i == -1: element = element[i + 1 : len(element)] return element
[docs] @staticmethod def strip_namespace_tag(element: str) -> str: """Returns the element name without the namespace tag Args: element: The element name with or without namespace as a string Returns: The element without namespace tag """ i = element.find("}") if not i == -1: element = element[i + 1 : len(element)] return element