ok
Direktori : /opt/cloudlinux/venv/lib/python3.11/site-packages/lxml/ |
Current File : //opt/cloudlinux/venv/lib/python3.11/site-packages/lxml/objectify.pyx |
# cython: binding=True # cython: auto_pickle=False # cython: language_level=2 """ The ``lxml.objectify`` module implements a Python object API for XML. It is based on `lxml.etree`. """ from __future__ import absolute_import cimport cython from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode from lxml.includes.tree cimport const_xmlChar, _xcstr from lxml cimport python from lxml.includes cimport tree cimport lxml.includes.etreepublic as cetree cimport libc.string as cstring_h # not to be confused with stdlib 'string' from libc.string cimport const_char __all__ = [u'BoolElement', u'DataElement', u'E', u'Element', u'ElementMaker', u'FloatElement', u'IntElement', u'LongElement', u'NoneElement', u'NumberElement', u'ObjectPath', u'ObjectifiedDataElement', u'ObjectifiedElement', u'ObjectifyElementClassLookup', u'PYTYPE_ATTRIBUTE', u'PyType', u'StringElement', u'SubElement', u'XML', u'annotate', u'deannotate', u'dump', u'enable_recursive_str', u'fromstring', u'getRegisteredTypes', u'makeparser', u'parse', u'pyannotate', u'pytypename', u'set_default_parser', u'set_pytype_attribute_tag', u'xsiannotate'] cdef object etree from lxml import etree # initialize C-API of lxml.etree import_lxml__etree() __version__ = etree.__version__ cdef object _float_is_inf, _float_is_nan from math import isinf as _float_is_inf, isnan as _float_is_nan cdef object re import re cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError) cdef object is_special_method = re.compile(u'__.*__$').match # Duplicated from apihelpers.pxi, since dependencies obstruct # including apihelpers.pxi. cdef strrepr(s): """Build a representation of strings which we can use in __repr__ methods, e.g. _Element.__repr__(). """ return s.encode('unicode-escape') if python.IS_PYTHON2 else s cdef object _typename(object t): cdef const_char* c_name c_name = python._fqtypename(t) s = cstring_h.strrchr(c_name, c'.') if s is not NULL: c_name = s + 1 return pyunicode(<const_xmlChar*>c_name) # namespace/name for "pytype" hint attribute cdef object PYTYPE_NAMESPACE cdef bytes PYTYPE_NAMESPACE_UTF8 cdef const_xmlChar* _PYTYPE_NAMESPACE cdef object PYTYPE_ATTRIBUTE_NAME cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8 cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME PYTYPE_ATTRIBUTE = None cdef unicode TREE_PYTYPE_NAME = u"TREE" cdef tuple _unicodeAndUtf8(s): return s, python.PyUnicode_AsUTF8String(s) def set_pytype_attribute_tag(attribute_tag=None): u"""set_pytype_attribute_tag(attribute_tag=None) Change name and namespace of the XML attribute that holds Python type information. Do not use this unless you know what you are doing. Reset by calling without argument. Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" """ global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 if attribute_tag is None: PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \ _unicodeAndUtf8(u"http://codespeak.net/lxml/objectify/pytype") PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ _unicodeAndUtf8(u"pytype") else: PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ cetree.getNsTag(attribute_tag) PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8') PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8') _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8 _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8 PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) set_pytype_attribute_tag() # namespaces for XML Schema cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \ _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema") cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8) cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \ _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema-instance") cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8) cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = u"{%s}nil" % XML_SCHEMA_INSTANCE_NS cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = u"{%s}type" % XML_SCHEMA_INSTANCE_NS ################################################################################ # Element class for the main API cdef class ObjectifiedElement(ElementBase): u"""Main XML Element class. Element children are accessed as object attributes. Multiple children with the same name are available through a list index. Example:: >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>") >>> second_c2 = root.c1.c2[1] >>> print(second_c2.text) 1 Note that you cannot (and must not) instantiate this class or its subclasses. """ def __iter__(self): u"""Iterate over self and all siblings with the same tag. """ parent = self.getparent() if parent is None: return iter([self]) return etree.ElementChildIterator(parent, tag=self.tag) def __str__(self): if __RECURSIVE_STR: return _dump(self, 0) else: return textOf(self._c_node) or u'' # pickle support for objectified Element def __reduce__(self): return fromstring, (etree.tostring(self),) @property def text(self): return textOf(self._c_node) @property def __dict__(self): """A fake implementation for __dict__ to support dir() etc. Note that this only considers the first child with a given name. """ cdef _Element child cdef dict children c_ns = tree._getNs(self._c_node) tag = u"{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None children = {} for child in etree.ElementChildIterator(self, tag=tag): if c_ns is NULL and tree._getNs(child._c_node) is not NULL: continue name = pyunicode(child._c_node.name) if name not in children: children[name] = child return children def __len__(self): u"""Count self and siblings with the same tag. """ return _countSiblings(self._c_node) def countchildren(self): u"""countchildren(self) Return the number of children of this element, regardless of their name. """ # copied from etree cdef Py_ssize_t c cdef tree.xmlNode* c_node c = 0 c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): c += 1 c_node = c_node.next return c def getchildren(self): u"""getchildren(self) Returns a sequence of all direct children. The elements are returned in document order. """ cdef tree.xmlNode* c_node result = [] c_node = self._c_node.children while c_node is not NULL: if tree._isElement(c_node): result.append(cetree.elementFactory(self._doc, c_node)) c_node = c_node.next return result def __getattr__(self, tag): u"""Return the (first) child with the given tag name. If no namespace is provided, the child will be looked up in the same one as self. """ if is_special_method(tag): return object.__getattr__(self, tag) return _lookupChildOrRaise(self, tag) def __setattr__(self, tag, value): u"""Set the value of the (first) child with the given tag name. If no namespace is provided, the child will be looked up in the same one as self. """ cdef _Element element # properties are looked up /after/ __setattr__, so we must emulate them if tag == u'text' or tag == u'pyval': # read-only ! raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable" elif tag == u'tail': cetree.setTailText(self._c_node, value) return elif tag == u'tag': ElementBase.tag.__set__(self, value) return elif tag == u'base': ElementBase.base.__set__(self, value) return tag = _buildChildTag(self, tag) element = _lookupChild(self, tag) if element is None: _appendValue(self, tag, value) else: _replaceElement(element, value) def __delattr__(self, tag): child = _lookupChildOrRaise(self, tag) self.remove(child) def addattr(self, tag, value): u"""addattr(self, tag, value) Add a child value to the element. As opposed to append(), it sets a data value, not an element. """ _appendValue(self, _buildChildTag(self, tag), value) def __getitem__(self, key): u"""Return a sibling, counting from the first child of the parent. The method behaves like both a dict and a sequence. * If argument is an integer, returns the sibling at that position. * If argument is a string, does the same as getattr(). This can be used to provide namespaces for element lookup, or to look up children with special names (``text`` etc.). * If argument is a slice object, returns the matching slice. """ cdef tree.xmlNode* c_self_node cdef tree.xmlNode* c_parent cdef tree.xmlNode* c_node cdef Py_ssize_t c_index if python._isString(key): return _lookupChildOrRaise(self, key) elif isinstance(key, slice): return list(self)[key] # normal item access c_index = key # raises TypeError if necessary c_self_node = self._c_node c_parent = c_self_node.parent if c_parent is NULL: if c_index == 0 or c_index == -1: return self raise IndexError, unicode(key) if c_index < 0: c_node = c_parent.last else: c_node = c_parent.children c_node = _findFollowingSibling( c_node, tree._getNs(c_self_node), c_self_node.name, c_index) if c_node is NULL: raise IndexError, unicode(key) return elementFactory(self._doc, c_node) def __setitem__(self, key, value): u"""Set the value of a sibling, counting from the first child of the parent. Implements key assignment, item assignment and slice assignment. * If argument is an integer, sets the sibling at that position. * If argument is a string, does the same as setattr(). This is used to provide namespaces for element lookup. * If argument is a sequence (list, tuple, etc.), assign the contained items to the siblings. """ cdef _Element element cdef tree.xmlNode* c_node if python._isString(key): key = _buildChildTag(self, key) element = _lookupChild(self, key) if element is None: _appendValue(self, key, value) else: _replaceElement(element, value) return if self._c_node.parent is NULL: # the 'root[i] = ...' case raise TypeError, u"assignment to root element is invalid" if isinstance(key, slice): # slice assignment _setSlice(key, self, value) else: # normal index assignment if key < 0: c_node = self._c_node.parent.last else: c_node = self._c_node.parent.children c_node = _findFollowingSibling( c_node, tree._getNs(self._c_node), self._c_node.name, key) if c_node is NULL: raise IndexError, unicode(key) element = elementFactory(self._doc, c_node) _replaceElement(element, value) def __delitem__(self, key): parent = self.getparent() if parent is None: raise TypeError, u"deleting items not supported by root element" if isinstance(key, slice): # slice deletion del_items = list(self)[key] remove = parent.remove for el in del_items: remove(el) else: # normal index deletion sibling = self.__getitem__(key) parent.remove(sibling) def descendantpaths(self, prefix=None): u"""descendantpaths(self, prefix=None) Returns a list of object path expressions for all descendants. """ if prefix is not None and not python._isString(prefix): prefix = u'.'.join(prefix) return _build_descendant_paths(self._c_node, prefix) cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name): if c_node.name != c_name: return 0 if c_href == NULL: return 1 c_node_href = tree._getNs(c_node) if c_node_href == NULL: return c_href[0] == c'\0' return tree.xmlStrcmp(c_node_href, c_href) == 0 cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node): cdef tree.xmlNode* c_node cdef Py_ssize_t count c_tag = c_start_node.name c_href = tree._getNs(c_start_node) count = 1 c_node = c_start_node.next while c_node is not NULL: if c_node.type == tree.XML_ELEMENT_NODE and \ _tagMatches(c_node, c_href, c_tag): count += 1 c_node = c_node.next c_node = c_start_node.prev while c_node is not NULL: if c_node.type == tree.XML_ELEMENT_NODE and \ _tagMatches(c_node, c_href, c_tag): count += 1 c_node = c_node.prev return count cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* name, Py_ssize_t index): cdef tree.xmlNode* (*next)(tree.xmlNode*) if index >= 0: next = cetree.nextElement else: index = -1 - index next = cetree.previousElement while c_node is not NULL: if c_node.type == tree.XML_ELEMENT_NODE and \ _tagMatches(c_node, href, name): index = index - 1 if index < 0: return c_node c_node = next(c_node) return NULL cdef object _lookupChild(_Element parent, tag): cdef tree.xmlNode* c_result cdef tree.xmlNode* c_node c_node = parent._c_node ns, tag = cetree.getNsTagWithEmptyNs(tag) c_tag = tree.xmlDictExists( c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag)) if c_tag is NULL: return None # not in the hash map => not in the tree if ns is None: # either inherit ns from parent or use empty (i.e. no) namespace c_href = tree._getNs(c_node) or <const_xmlChar*>'' else: c_href = _xcstr(ns) c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) if c_result is NULL: return None return elementFactory(parent._doc, c_result) cdef object _lookupChildOrRaise(_Element parent, tag): element = _lookupChild(parent, tag) if element is None: raise AttributeError, u"no such child: " + _buildChildTag(parent, tag) return element cdef object _buildChildTag(_Element parent, tag): ns, tag = cetree.getNsTag(tag) c_tag = _xcstr(tag) c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns) return cetree.namespacedNameFromNsName(c_href, c_tag) cdef _replaceElement(_Element element, value): cdef _Element new_element if isinstance(value, _Element): # deep copy the new element new_element = cetree.deepcopyNodeToDocument( element._doc, (<_Element>value)._c_node) new_element.tag = element.tag elif isinstance(value, (list, tuple)): element[:] = value return else: new_element = element.makeelement(element.tag) _setElementValue(new_element, value) element.getparent().replace(element, new_element) cdef _appendValue(_Element parent, tag, value): cdef _Element new_element if isinstance(value, _Element): # deep copy the new element new_element = cetree.deepcopyNodeToDocument( parent._doc, (<_Element>value)._c_node) new_element.tag = tag cetree.appendChildToElement(parent, new_element) elif isinstance(value, (list, tuple)): for item in value: _appendValue(parent, tag, item) else: new_element = cetree.makeElement( tag, parent._doc, None, None, None, None, None) _setElementValue(new_element, value) cetree.appendChildToElement(parent, new_element) cdef _setElementValue(_Element element, value): if value is None: cetree.setAttributeValue( element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true") elif isinstance(value, _Element): _replaceElement(element, value) return else: cetree.delAttributeFromNsName( element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") if python._isString(value): pytype_name = u"str" py_type = <PyType>_PYTYPE_DICT.get(pytype_name) else: pytype_name = _typename(value) py_type = <PyType>_PYTYPE_DICT.get(pytype_name) if py_type is not None: value = py_type.stringify(value) else: value = unicode(value) if py_type is not None: cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) else: cetree.delAttributeFromNsName( element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) cetree.setNodeText(element._c_node, value) cdef _setSlice(sliceobject, _Element target, items): cdef _Element parent cdef tree.xmlNode* c_node cdef Py_ssize_t c_step, c_start, pos # collect existing slice if (<slice>sliceobject).step is None: c_step = 1 else: c_step = (<slice>sliceobject).step if c_step == 0: raise ValueError, u"Invalid slice" cdef list del_items = target[sliceobject] # collect new values new_items = [] tag = target.tag for item in items: if isinstance(item, _Element): # deep copy the new element new_element = cetree.deepcopyNodeToDocument( target._doc, (<_Element>item)._c_node) new_element.tag = tag else: new_element = cetree.makeElement( tag, target._doc, None, None, None, None, None) _setElementValue(new_element, item) new_items.append(new_element) # sanity check - raise what a list would raise if c_step != 1 and len(del_items) != len(new_items): raise ValueError, \ f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}" # replace existing items pos = 0 parent = target.getparent() replace = parent.replace while pos < len(new_items) and pos < len(del_items): replace(del_items[pos], new_items[pos]) pos += 1 # remove leftover items if pos < len(del_items): remove = parent.remove while pos < len(del_items): remove(del_items[pos]) pos += 1 # append remaining new items if pos < len(new_items): # the sanity check above guarantees (step == 1) if pos > 0: item = new_items[pos-1] else: if (<slice>sliceobject).start > 0: c_node = parent._c_node.children else: c_node = parent._c_node.last c_node = _findFollowingSibling( c_node, tree._getNs(target._c_node), target._c_node.name, (<slice>sliceobject).start - 1) if c_node is NULL: while pos < len(new_items): cetree.appendChildToElement(parent, new_items[pos]) pos += 1 return item = cetree.elementFactory(parent._doc, c_node) while pos < len(new_items): add = item.addnext item = new_items[pos] add(item) pos += 1 ################################################################################ # Data type support in subclasses cdef class ObjectifiedDataElement(ObjectifiedElement): u"""This is the base class for all data type Elements. Subclasses should override the 'pyval' property and possibly the __str__ method. """ @property def pyval(self): return textOf(self._c_node) def __str__(self): return textOf(self._c_node) or '' def __repr__(self): return strrepr(textOf(self._c_node) or '') def _setText(self, s): u"""For use in subclasses only. Don't use unless you know what you are doing. """ cetree.setNodeText(self._c_node, s) cdef class NumberElement(ObjectifiedDataElement): cdef object _parse_value def _setValueParser(self, function): u"""Set the function that parses the Python value from a string. Do not use this unless you know what you are doing. """ self._parse_value = function @property def pyval(self): return _parseNumber(self) def __int__(self): return int(_parseNumber(self)) def __long__(self): return long(_parseNumber(self)) def __float__(self): return float(_parseNumber(self)) def __complex__(self): return complex(_parseNumber(self)) def __str__(self): return unicode(_parseNumber(self)) def __repr__(self): return repr(_parseNumber(self)) def __oct__(self): return oct(_parseNumber(self)) def __hex__(self): return hex(_parseNumber(self)) def __richcmp__(self, other, int op): return _richcmpPyvals(self, other, op) def __hash__(self): return hash(_parseNumber(self)) def __add__(self, other): return _numericValueOf(self) + _numericValueOf(other) def __radd__(self, other): return _numericValueOf(other) + _numericValueOf(self) def __sub__(self, other): return _numericValueOf(self) - _numericValueOf(other) def __rsub__(self, other): return _numericValueOf(other) - _numericValueOf(self) def __mul__(self, other): return _numericValueOf(self) * _numericValueOf(other) def __rmul__(self, other): return _numericValueOf(other) * _numericValueOf(self) def __div__(self, other): return _numericValueOf(self) / _numericValueOf(other) def __rdiv__(self, other): return _numericValueOf(other) / _numericValueOf(self) def __truediv__(self, other): return _numericValueOf(self) / _numericValueOf(other) def __rtruediv__(self, other): return _numericValueOf(other) / _numericValueOf(self) def __floordiv__(self, other): return _numericValueOf(self) // _numericValueOf(other) def __rfloordiv__(self, other): return _numericValueOf(other) // _numericValueOf(self) def __mod__(self, other): return _numericValueOf(self) % _numericValueOf(other) def __rmod__(self, other): return _numericValueOf(other) % _numericValueOf(self) def __divmod__(self, other): return divmod(_numericValueOf(self), _numericValueOf(other)) def __rdivmod__(self, other): return divmod(_numericValueOf(other), _numericValueOf(self)) def __pow__(self, other, modulo): if modulo is None: return _numericValueOf(self) ** _numericValueOf(other) else: return pow(_numericValueOf(self), _numericValueOf(other), modulo) def __rpow__(self, other, modulo): if modulo is None: return _numericValueOf(other) ** _numericValueOf(self) else: return pow(_numericValueOf(other), _numericValueOf(self), modulo) def __neg__(self): return - _numericValueOf(self) def __pos__(self): return + _numericValueOf(self) def __abs__(self): return abs( _numericValueOf(self) ) def __bool__(self): return bool(_numericValueOf(self)) def __invert__(self): return ~ _numericValueOf(self) def __lshift__(self, other): return _numericValueOf(self) << _numericValueOf(other) def __rlshift__(self, other): return _numericValueOf(other) << _numericValueOf(self) def __rshift__(self, other): return _numericValueOf(self) >> _numericValueOf(other) def __rrshift__(self, other): return _numericValueOf(other) >> _numericValueOf(self) def __and__(self, other): return _numericValueOf(self) & _numericValueOf(other) def __rand__(self, other): return _numericValueOf(other) & _numericValueOf(self) def __or__(self, other): return _numericValueOf(self) | _numericValueOf(other) def __ror__(self, other): return _numericValueOf(other) | _numericValueOf(self) def __xor__(self, other): return _numericValueOf(self) ^ _numericValueOf(other) def __rxor__(self, other): return _numericValueOf(other) ^ _numericValueOf(self) cdef class IntElement(NumberElement): def _init(self): self._parse_value = int def __index__(self): return int(_parseNumber(self)) cdef class LongElement(NumberElement): def _init(self): self._parse_value = long def __index__(self): return int(_parseNumber(self)) cdef class FloatElement(NumberElement): def _init(self): self._parse_value = float cdef class StringElement(ObjectifiedDataElement): u"""String data class. Note that this class does *not* support the sequence protocol of strings: len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. Instead, use the .text attribute to get a 'real' string. """ @property def pyval(self): return textOf(self._c_node) or u'' def __repr__(self): return repr(textOf(self._c_node) or u'') def strlen(self): text = textOf(self._c_node) if text is None: return 0 else: return len(text) def __bool__(self): return bool(textOf(self._c_node)) def __richcmp__(self, other, int op): return _richcmpPyvals(self, other, op) def __hash__(self): return hash(textOf(self._c_node) or u'') def __add__(self, other): text = _strValueOf(self) other = _strValueOf(other) return text + other def __radd__(self, other): text = _strValueOf(self) other = _strValueOf(other) return other + text def __mul__(self, other): if isinstance(self, StringElement): return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other) elif isinstance(other, StringElement): return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '') else: return NotImplemented def __rmul__(self, other): return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '') def __mod__(self, other): return (_strValueOf(self) or '') % other def __int__(self): return int(textOf(self._c_node)) def __long__(self): return long(textOf(self._c_node)) def __float__(self): return float(textOf(self._c_node)) def __complex__(self): return complex(textOf(self._c_node)) cdef class NoneElement(ObjectifiedDataElement): def __str__(self): return u"None" def __repr__(self): return "None" def __bool__(self): return False def __richcmp__(self, other, int op): if other is None or self is None: return python.PyObject_RichCompare(None, None, op) if isinstance(self, NoneElement): return python.PyObject_RichCompare(None, other, op) else: return python.PyObject_RichCompare(self, None, op) def __hash__(self): return hash(None) @property def pyval(self): return None cdef class BoolElement(IntElement): u"""Boolean type base on string values: 'true' or 'false'. Note that this inherits from IntElement to mimic the behaviour of Python's bool type. """ def _init(self): self._parse_value = _parseBool # wraps as Python callable def __bool__(self): return _parseBool(textOf(self._c_node)) def __int__(self): return 0 + _parseBool(textOf(self._c_node)) def __float__(self): return 0.0 + _parseBool(textOf(self._c_node)) def __richcmp__(self, other, int op): return _richcmpPyvals(self, other, op) def __hash__(self): return hash(_parseBool(textOf(self._c_node))) def __str__(self): return unicode(_parseBool(textOf(self._c_node))) def __repr__(self): return repr(_parseBool(textOf(self._c_node))) @property def pyval(self): return _parseBool(textOf(self._c_node)) cdef _checkBool(s): cdef int value = -1 if s is not None: value = __parseBoolAsInt(s) if value == -1: raise ValueError cdef bint _parseBool(s) except -1: cdef int value if s is None: return False value = __parseBoolAsInt(s) if value == -1: raise ValueError, f"Invalid boolean value: '{s}'" return value cdef inline int __parseBoolAsInt(text) except -2: if text == 'false': return 0 elif text == 'true': return 1 elif text == '0': return 0 elif text == '1': return 1 return -1 cdef object _parseNumber(NumberElement element): return element._parse_value(textOf(element._c_node)) cdef enum NumberParserState: NPS_SPACE_PRE = 0 NPS_SIGN = 1 NPS_DIGITS = 2 NPS_POINT_LEAD = 3 NPS_POINT = 4 NPS_FRACTION = 5 NPS_EXP = 6 NPS_EXP_SIGN = 7 NPS_DIGITS_EXP = 8 NPS_SPACE_TAIL = 9 NPS_INF1 = 20 NPS_INF2 = 21 NPS_INF3 = 22 NPS_NAN1 = 23 NPS_NAN2 = 24 NPS_NAN3 = 25 NPS_ERROR = 99 ctypedef fused bytes_unicode: bytes unicode cdef _checkNumber(bytes_unicode s, bint allow_float): cdef Py_UCS4 c cdef NumberParserState state = NPS_SPACE_PRE for c in s: if c.isdigit() if (bytes_unicode is unicode) else c in b'0123456789': if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP): pass elif state in (NPS_SPACE_PRE, NPS_SIGN): state = NPS_DIGITS elif state in (NPS_POINT_LEAD, NPS_POINT): state = NPS_FRACTION elif state in (NPS_EXP, NPS_EXP_SIGN): state = NPS_DIGITS_EXP else: state = NPS_ERROR else: if c == u'.': if state in (NPS_SPACE_PRE, NPS_SIGN): state = NPS_POINT_LEAD elif state == NPS_DIGITS: state = NPS_POINT else: state = NPS_ERROR if not allow_float: state = NPS_ERROR elif c in u'-+': if state == NPS_SPACE_PRE: state = NPS_SIGN elif state == NPS_EXP: state = NPS_EXP_SIGN else: state = NPS_ERROR elif c == u'E': if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION): state = NPS_EXP else: state = NPS_ERROR if not allow_float: state = NPS_ERROR # Allow INF and NaN. XMLSchema requires case, we don't, like Python. elif c in u'iI': state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR elif c in u'fF': state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR elif c in u'aA': state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR elif c in u'nN': # Python also allows [+-]NaN, so let's accept that. if state in (NPS_SPACE_PRE, NPS_SIGN): state = NPS_NAN1 if allow_float else NPS_ERROR elif state == NPS_NAN2: state = NPS_NAN3 elif state == NPS_INF1: state = NPS_INF2 else: state = NPS_ERROR # Allow spaces around text values. else: if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20': if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL): pass elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3): state = NPS_SPACE_TAIL else: state = NPS_ERROR else: state = NPS_ERROR if state == NPS_ERROR: break if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL): raise ValueError cdef _checkInt(s): if python.IS_PYTHON2 and type(s) is bytes: return _checkNumber(<bytes>s, allow_float=False) else: return _checkNumber(<unicode>s, allow_float=False) cdef _checkFloat(s): if python.IS_PYTHON2 and type(s) is bytes: return _checkNumber(<bytes>s, allow_float=True) else: return _checkNumber(<unicode>s, allow_float=True) cdef object _strValueOf(obj): if python._isString(obj): return obj if isinstance(obj, _Element): return textOf((<_Element>obj)._c_node) or u'' if obj is None: return u'' return unicode(obj) cdef object _numericValueOf(obj): if isinstance(obj, NumberElement): return _parseNumber(<NumberElement>obj) try: # not always numeric, but Python will raise the right exception return obj.pyval except AttributeError: pass return obj cdef _richcmpPyvals(left, right, int op): left = getattr(left, 'pyval', left) right = getattr(right, 'pyval', right) return python.PyObject_RichCompare(left, right, op) ################################################################################ # Python type registry cdef class PyType: u"""PyType(self, name, type_check, type_class, stringify=None) User defined type. Named type that contains a type check function, a type class that inherits from ObjectifiedDataElement and an optional "stringification" function. The type check must take a string as argument and raise ValueError or TypeError if it cannot handle the string value. It may be None in which case it is not considered for type guessing. For registered named types, the 'stringify' function (or unicode() if None) is used to convert a Python object with type name 'name' to the string representation stored in the XML tree. Example:: PyType('int', int, MyIntClass).register() Note that the order in which types are registered matters. The first matching type will be used. """ cdef readonly object name cdef readonly object type_check cdef readonly object stringify cdef object _type cdef list _schema_types def __init__(self, name, type_check, type_class, stringify=None): if isinstance(name, bytes): name = (<bytes>name).decode('ascii') elif not isinstance(name, unicode): raise TypeError, u"Type name must be a string" if type_check is not None and not callable(type_check): raise TypeError, u"Type check function must be callable (or None)" if name != TREE_PYTYPE_NAME and \ not issubclass(type_class, ObjectifiedDataElement): raise TypeError, \ u"Data classes must inherit from ObjectifiedDataElement" self.name = name self._type = type_class self.type_check = type_check if stringify is None: stringify = unicode self.stringify = stringify self._schema_types = [] def __repr__(self): return "PyType(%s, %s)" % (self.name, self._type.__name__) def register(self, before=None, after=None): u"""register(self, before=None, after=None) Register the type. The additional keyword arguments 'before' and 'after' accept a sequence of type names that must appear before/after the new type in the type list. If any of them is not currently known, it is simply ignored. Raises ValueError if the dependencies cannot be fulfilled. """ if self.name == TREE_PYTYPE_NAME: raise ValueError, u"Cannot register tree type" if self.type_check is not None: for item in _TYPE_CHECKS: if item[0] is self.type_check: _TYPE_CHECKS.remove(item) break entry = (self.type_check, self) first_pos = 0 last_pos = -1 if before or after: if before is None: before = () elif after is None: after = () for i, (check, pytype) in enumerate(_TYPE_CHECKS): if last_pos == -1 and pytype.name in before: last_pos = i if pytype.name in after: first_pos = i+1 if last_pos == -1: _TYPE_CHECKS.append(entry) elif first_pos > last_pos: raise ValueError, u"inconsistent before/after dependencies" else: _TYPE_CHECKS.insert(last_pos, entry) _PYTYPE_DICT[self.name] = self for xs_type in self._schema_types: _SCHEMA_TYPE_DICT[xs_type] = self def unregister(self): u"unregister(self)" if _PYTYPE_DICT.get(self.name) is self: del _PYTYPE_DICT[self.name] for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()): if pytype is self: del _SCHEMA_TYPE_DICT[xs_type] if self.type_check is None: return try: _TYPE_CHECKS.remove( (self.type_check, self) ) except ValueError: pass property xmlSchemaTypes: u"""The list of XML Schema datatypes this Python type maps to. Note that this must be set before registering the type! """ def __get__(self): return self._schema_types def __set__(self, types): self._schema_types = list(map(unicode, types)) cdef dict _PYTYPE_DICT = {} cdef dict _SCHEMA_TYPE_DICT = {} cdef list _TYPE_CHECKS = [] cdef unicode _xml_bool(value): return u"true" if value else u"false" cdef unicode _xml_float(value): if _float_is_inf(value): if value > 0: return u"INF" return u"-INF" if _float_is_nan(value): return u"NaN" return unicode(repr(value)) cdef _pytypename(obj): return u"str" if python._isString(obj) else _typename(obj) def pytypename(obj): u"""pytypename(obj) Find the name of the corresponding PyType for a Python object. """ return _pytypename(obj) cdef _registerPyTypes(): pytype = PyType(u'int', _checkInt, IntElement) # wraps functions for Python pytype.xmlSchemaTypes = (u"integer", u"int", u"short", u"byte", u"unsignedShort", u"unsignedByte", u"nonPositiveInteger", u"negativeInteger", u"long", u"nonNegativeInteger", u"unsignedLong", u"unsignedInt", u"positiveInteger",) pytype.register() # 'long' type just for backwards compatibility pytype = PyType(u'long', None, IntElement) pytype.register() pytype = PyType(u'float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python pytype.xmlSchemaTypes = (u"double", u"float") pytype.register() pytype = PyType(u'bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python pytype.xmlSchemaTypes = (u"boolean",) pytype.register() pytype = PyType(u'str', None, StringElement) pytype.xmlSchemaTypes = (u"string", u"normalizedString", u"token", u"language", u"Name", u"NCName", u"ID", u"IDREF", u"ENTITY", u"NMTOKEN", ) pytype.register() # since lxml 2.0 pytype = PyType(u'NoneType', None, NoneElement) pytype.register() # backwards compatibility pytype = PyType(u'none', None, NoneElement) pytype.register() # non-registered PyType for inner tree elements cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) _registerPyTypes() def getRegisteredTypes(): u"""getRegisteredTypes() Returns a list of the currently registered PyType objects. To add a new type, retrieve this list and call unregister() for all entries. Then add the new type at a suitable position (possibly replacing an existing one) and call register() for all entries. This is necessary if the new type interferes with the type check functions of existing ones (normally only int/float/bool) and must the tried before other types. To add a type that is not yet parsable by the current type check functions, you can simply register() it, which will append it to the end of the type list. """ cdef list types = [] cdef set known = set() for check, pytype in _TYPE_CHECKS: name = pytype.name if name not in known: known.add(name) types.append(pytype) for pytype in _PYTYPE_DICT.values(): name = pytype.name if name not in known: known.add(name) types.append(pytype) return types cdef PyType _guessPyType(value, PyType defaulttype): if value is None: return None for type_check, tested_pytype in _TYPE_CHECKS: try: type_check(value) return <PyType>tested_pytype except IGNORABLE_ERRORS: # could not be parsed as the specified type => ignore pass return defaulttype cdef object _guessElementClass(tree.xmlNode* c_node): value = textOf(c_node) if value is None: return None if value == '': return StringElement for type_check, pytype in _TYPE_CHECKS: try: type_check(value) return (<PyType>pytype)._type except IGNORABLE_ERRORS: pass return None ################################################################################ # adapted ElementMaker supports registered PyTypes @cython.final @cython.internal cdef class _ObjectifyElementMakerCaller: cdef object _tag cdef object _nsmap cdef object _element_factory cdef bint _annotate def __call__(self, *children, **attrib): u"__call__(self, *children, **attrib)" cdef _ObjectifyElementMakerCaller elementMaker cdef _Element element cdef _Element childElement cdef bint has_children cdef bint has_string_value if self._element_factory is None: element = _makeElement(self._tag, None, attrib, self._nsmap) else: element = self._element_factory(self._tag, attrib, self._nsmap) pytype_name = None has_children = False has_string_value = False for child in children: if child is None: if len(children) == 1: cetree.setAttributeValue( element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true") elif python._isString(child): _add_text(element, child) has_string_value = True elif isinstance(child, _Element): cetree.appendChildToElement(element, <_Element>child) has_children = True elif isinstance(child, _ObjectifyElementMakerCaller): elementMaker = <_ObjectifyElementMakerCaller>child if elementMaker._element_factory is None: cetree.makeSubElement(element, elementMaker._tag, None, None, None, None) else: childElement = elementMaker._element_factory( elementMaker._tag) cetree.appendChildToElement(element, childElement) has_children = True elif isinstance(child, dict): for name, value in child.items(): # keyword arguments in attrib take precedence if name in attrib: continue pytype = _PYTYPE_DICT.get(_typename(value)) if pytype is not None: value = (<PyType>pytype).stringify(value) elif not python._isString(value): value = unicode(value) cetree.setAttributeValue(element, name, value) else: if pytype_name is not None: # concatenation always makes the result a string has_string_value = True pytype_name = _typename(child) pytype = _PYTYPE_DICT.get(_typename(child)) if pytype is not None: _add_text(element, (<PyType>pytype).stringify(child)) else: has_string_value = True child = unicode(child) _add_text(element, child) if self._annotate and not has_children: if has_string_value: cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, u"str") elif pytype_name is not None: cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) return element cdef _add_text(_Element elem, text): # add text to the tree in construction, either as element text or # tail text, depending on the current tree state cdef tree.xmlNode* c_child c_child = cetree.findChildBackwards(elem._c_node, 0) if c_child is not NULL: old = cetree.tailOf(c_child) if old is not None: text = old + text cetree.setTailText(c_child, text) else: old = cetree.textOf(elem._c_node) if old is not None: text = old + text cetree.setNodeText(elem._c_node, text) cdef class ElementMaker: u"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) An ElementMaker that can be used for constructing trees. Example:: >>> M = ElementMaker(annotate=False) >>> attributes = {'class': 'par'} >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) ) >>> from lxml.etree import tostring >>> print(tostring(html, method='html').decode('ascii')) <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html> To create tags that are not valid Python identifiers, call the factory directly and pass the tag name as first argument:: >>> root = M('tricky-tag', 'some text') >>> print(root.tag) tricky-tag >>> print(root.text) some text Note that this module has a predefined ElementMaker instance called ``E``. """ cdef object _makeelement cdef object _namespace cdef object _nsmap cdef bint _annotate cdef dict _cache def __init__(self, *, namespace=None, nsmap=None, annotate=True, makeelement=None): if nsmap is None: nsmap = _DEFAULT_NSMAP if annotate else {} self._nsmap = nsmap self._namespace = None if namespace is None else u"{%s}" % namespace self._annotate = annotate if makeelement is not None: if not callable(makeelement): raise TypeError( f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}") self._makeelement = makeelement else: self._makeelement = None self._cache = {} @cython.final cdef _build_element_maker(self, tag, bint caching): cdef _ObjectifyElementMakerCaller element_maker element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller) if self._namespace is not None and tag[0] != u"{": element_maker._tag = self._namespace + tag else: element_maker._tag = tag element_maker._nsmap = self._nsmap element_maker._annotate = self._annotate element_maker._element_factory = self._makeelement if caching: if len(self._cache) > 200: self._cache.clear() self._cache[tag] = element_maker return element_maker def __getattr__(self, tag): element_maker = self._cache.get(tag) if element_maker is None: if is_special_method(tag): return object.__getattr__(self, tag) return self._build_element_maker(tag, caching=True) return element_maker def __call__(self, tag, *args, **kwargs): element_maker = self._cache.get(tag) if element_maker is None: element_maker = self._build_element_maker( tag, caching=not is_special_method(tag)) return element_maker(*args, **kwargs) ################################################################################ # Recursive element dumping cdef bint __RECURSIVE_STR = 0 # default: off def enable_recursive_str(on=True): u"""enable_recursive_str(on=True) Enable a recursively generated tree representation for str(element), based on objectify.dump(element). """ global __RECURSIVE_STR __RECURSIVE_STR = on def dump(_Element element not None): u"""dump(_Element element not None) Return a recursively generated string representation of an element. """ return _dump(element, 0) cdef object _dump(_Element element, int indent): indentstr = u" " * indent if isinstance(element, ObjectifiedDataElement): value = repr(element) else: value = textOf(element._c_node) if value is not None: if not value.strip(): value = None else: value = repr(value) result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n" xsi_ns = u"{%s}" % XML_SCHEMA_INSTANCE_NS pytype_ns = u"{%s}" % PYTYPE_NAMESPACE for name, value in sorted(cetree.iterattributes(element, 3)): if u'{' in name: if name == PYTYPE_ATTRIBUTE: if value == TREE_PYTYPE_NAME: continue else: name = name.replace(pytype_ns, u'py:') name = name.replace(xsi_ns, u'xsi:') result += f"{indentstr} * {name} = {value!r}\n" indent += 1 for child in element.iterchildren(): result += _dump(child, indent) if indent == 1: return result[:-1] # strip last '\n' else: return result ################################################################################ # Pickle support for objectified ElementTree def __unpickleElementTree(data): return etree.ElementTree(fromstring(data)) cdef _setupPickle(elementTreeReduceFunction): if python.IS_PYTHON2: import copy_reg as copyreg else: import copyreg copyreg.pickle(etree._ElementTree, elementTreeReduceFunction, __unpickleElementTree) def pickleReduceElementTree(obj): return __unpickleElementTree, (etree.tostring(obj),) _setupPickle(pickleReduceElementTree) del pickleReduceElementTree ################################################################################ # Element class lookup cdef class ObjectifyElementClassLookup(ElementClassLookup): u"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) Element class lookup method that uses the objectify classes. """ cdef object empty_data_class cdef object tree_class def __init__(self, tree_class=None, empty_data_class=None): u"""Lookup mechanism for objectify. The default Element classes can be replaced by passing subclasses of ObjectifiedElement and ObjectifiedDataElement as keyword arguments. 'tree_class' defines inner tree classes (defaults to ObjectifiedElement), 'empty_data_class' defines the default class for empty data elements (defaults to StringElement). """ self._lookup_function = _lookupElementClass if tree_class is None: tree_class = ObjectifiedElement self.tree_class = tree_class if empty_data_class is None: empty_data_class = StringElement self.empty_data_class = empty_data_class cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): cdef ObjectifyElementClassLookup lookup lookup = <ObjectifyElementClassLookup>state # if element has children => no data class if cetree.hasChild(c_node): return lookup.tree_class # if element is defined as xsi:nil, return NoneElement class if u"true" == cetree.attributeValueFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"): return NoneElement # check for Python type hint value = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if value is not None: if value == TREE_PYTYPE_NAME: return lookup.tree_class py_type = <PyType>_PYTYPE_DICT.get(value) if py_type is not None: return py_type._type # unknown 'pyval' => try to figure it out ourself, just go on # check for XML Schema type hint value = cetree.attributeValueFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") if value is not None: schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) if schema_type is None and u':' in value: prefix, value = value.split(u':', 1) schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) if schema_type is not None: return schema_type._type # otherwise determine class based on text content type el_class = _guessElementClass(c_node) if el_class is not None: return el_class # if element is a root node => default to tree node if c_node.parent is NULL or not tree._isElement(c_node.parent): return lookup.tree_class return lookup.empty_data_class ################################################################################ # Type annotations cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): if pytype is None: return None value = textOf(c_node) try: pytype.type_check(value) return pytype except IGNORABLE_ERRORS: # could not be parsed as the specified type => ignore pass return None def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, empty_pytype=None): u"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) Recursively annotates the elements of an XML tree with 'pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current 'pytype' attributes will be ignored and replaced. Otherwise, they will be checked and only replaced if they no longer fit the current text value. Setting the keyword argument ``ignore_xsi`` to True makes the function additionally ignore existing ``xsi:type`` annotations. The default is to use them as a type hint. The default annotation of empty elements can be set with the ``empty_pytype`` keyword argument. The default is not to annotate empty elements. Pass 'str', for example, to make string values the default. """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, empty_type=None): u"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) Recursively annotates the elements of an XML tree with 'xsi:type' attributes. If the 'ignore_old' keyword argument is True (the default), current 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be checked and only replaced if they no longer fit the current text value. Note that the mapping from Python types to XSI types is usually ambiguous. Currently, only the first XSI type name in the corresponding PyType definition will be used for annotation. Thus, you should consider naming the widest type first if you define additional types. Setting the keyword argument ``ignore_pytype`` to True makes the function additionally ignore existing ``pytype`` annotations. The default is to use them as a type hint. The default annotation of empty elements can be set with the ``empty_type`` keyword argument. The default is not to annotate empty elements. Pass 'string', for example, to make string values the default. """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1): u"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) Recursively annotates the elements of an XML tree with 'xsi:type' and/or 'py:pytype' attributes. If the 'ignore_old' keyword argument is True (the default), current 'py:pytype' attributes will be ignored for the type annotation. Set to False if you want reuse existing 'py:pytype' information (iff appropriate for the element text value). If the 'ignore_xsi' keyword argument is False (the default), existing 'xsi:type' attributes will be used for the type annotation, if they fit the element text values. Note that the mapping from Python types to XSI types is usually ambiguous. Currently, only the first XSI type name in the corresponding PyType definition will be used for annotation. Thus, you should consider naming the widest type first if you define additional types. The default 'py:pytype' annotation of empty elements can be set with the ``empty_pytype`` keyword argument. Pass 'str', for example, to make string values the default. The default 'xsi:type' annotation of empty elements can be set with the ``empty_type`` keyword argument. The default is not to annotate empty elements. Pass 'string', for example, to make string values the default. The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' (default: 1) control which kind(s) of annotation to use. """ cdef _Element element element = cetree.rootNodeOrRaise(element_or_tree) _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, ignore_old, empty_type, empty_pytype) cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, bint ignore_xsi, bint ignore_pytype, empty_type_name, empty_pytype_name): cdef _Document doc cdef tree.xmlNode* c_node cdef PyType empty_pytype, StrType, NoneType if not annotate_xsi and not annotate_pytype: return if empty_type_name is not None: if isinstance(empty_type_name, bytes): empty_type_name = (<bytes>empty_type_name).decode("ascii") empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name) elif empty_pytype_name is not None: if isinstance(empty_pytype_name, bytes): empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii") empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name) else: empty_pytype = None StrType = <PyType>_PYTYPE_DICT.get(u'str') NoneType = <PyType>_PYTYPE_DICT.get(u'NoneType') doc = element._doc c_node = element._c_node tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: _annotate_element(c_node, doc, annotate_xsi, annotate_pytype, ignore_xsi, ignore_pytype, empty_type_name, empty_pytype, StrType, NoneType) tree.END_FOR_EACH_ELEMENT_FROM(c_node) cdef int _annotate_element(tree.xmlNode* c_node, _Document doc, bint annotate_xsi, bint annotate_pytype, bint ignore_xsi, bint ignore_pytype, empty_type_name, PyType empty_pytype, PyType StrType, PyType NoneType) except -1: cdef tree.xmlNs* c_ns cdef PyType pytype = None typename = None istree = 0 # if element is defined as xsi:nil, represent it as None if cetree.attributeValueFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true": pytype = NoneType if pytype is None and not ignore_xsi: # check that old xsi type value is valid typename = cetree.attributeValueFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") if typename is not None: pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) if pytype is None and u':' in typename: prefix, typename = typename.split(u':', 1) pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) if pytype is not None and pytype is not StrType: # StrType does not have a typecheck but is the default # anyway, so just accept it if given as type # information pytype = _check_type(c_node, pytype) if pytype is None: typename = None if pytype is None and not ignore_pytype: # check that old pytype value is valid old_pytypename = cetree.attributeValueFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) if old_pytypename is not None: if old_pytypename == TREE_PYTYPE_NAME: if not cetree.hasChild(c_node): # only case where we should keep it, # everything else is clear enough pytype = TREE_PYTYPE else: if old_pytypename == 'none': # transition from lxml 1.x old_pytypename = "NoneType" pytype = <PyType>_PYTYPE_DICT.get(old_pytypename) if pytype is not None and pytype is not StrType: # StrType does not have a typecheck but is the # default anyway, so just accept it if given as # type information pytype = _check_type(c_node, pytype) if pytype is None: # try to guess type if not cetree.hasChild(c_node): # element has no children => data class pytype = _guessPyType(textOf(c_node), StrType) else: istree = 1 if pytype is None: # use default type for empty elements if cetree.hasText(c_node): pytype = StrType else: pytype = empty_pytype if typename is None: typename = empty_type_name if pytype is not None: if typename is None: if not istree: if pytype._schema_types: # pytype->xsi:type is a 1:n mapping # simply take the first typename = pytype._schema_types[0] elif typename not in pytype._schema_types: typename = pytype._schema_types[0] if annotate_xsi: if typename is None or istree: cetree.delAttributeFromNsName( c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") else: # update or create attribute typename_utf8 = cetree.utf8(typename) c_ns = cetree.findOrBuildNodeNsPrefix( doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd') if c_ns is not NULL: if b':' in typename_utf8: prefix, name = typename_utf8.split(b':', 1) if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0': typename_utf8 = name elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0: typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0': typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8 c_ns = cetree.findOrBuildNodeNsPrefix( doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8)) if annotate_pytype: if pytype is None: # delete attribute if it exists cetree.delAttributeFromNsName( c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) else: # update or create attribute c_ns = cetree.findOrBuildNodeNsPrefix( doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py') pytype_name = cetree.utf8(pytype.name) tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, _xcstr(pytype_name)) if pytype is NoneType: c_ns = cetree.findOrBuildNodeNsPrefix( doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true") return 0 cdef object _strip_attributes = etree.strip_attributes cdef object _cleanup_namespaces = etree.cleanup_namespaces def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True, bint xsi_nil=False, bint cleanup_namespaces=False): u"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False) Recursively de-annotate the elements of an XML tree by removing 'py:pytype' and/or 'xsi:type' attributes and/or 'xsi:nil' attributes. If the 'pytype' keyword argument is True (the default), 'py:pytype' attributes will be removed. If the 'xsi' keyword argument is True (the default), 'xsi:type' attributes will be removed. If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil' attributes will be removed. Note that this does not touch the namespace declarations by default. If you want to remove unused namespace declarations from the tree, pass the option ``cleanup_namespaces=True``. """ cdef list attribute_names = [] if pytype: attribute_names.append(PYTYPE_ATTRIBUTE) if xsi: attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR) if xsi_nil: attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR) _strip_attributes(element_or_tree, *attribute_names) if cleanup_namespaces: _cleanup_namespaces(element_or_tree) ################################################################################ # Module level parser setup cdef object __DEFAULT_PARSER __DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) __DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() ) cdef object objectify_parser objectify_parser = __DEFAULT_PARSER def set_default_parser(new_parser = None): u"""set_default_parser(new_parser = None) Replace the default parser used by objectify's Element() and fromstring() functions. The new parser must be an etree.XMLParser. Call without arguments to reset to the original parser. """ global objectify_parser if new_parser is None: objectify_parser = __DEFAULT_PARSER elif isinstance(new_parser, etree.XMLParser): objectify_parser = new_parser else: raise TypeError, u"parser must inherit from lxml.etree.XMLParser" def makeparser(**kw): u"""makeparser(remove_blank_text=True, **kw) Create a new XML parser for objectify trees. You can pass all keyword arguments that are supported by ``etree.XMLParser()``. Note that this parser defaults to removing blank text. You can disable this by passing the ``remove_blank_text`` boolean keyword option yourself. """ if 'remove_blank_text' not in kw: kw['remove_blank_text'] = True parser = etree.XMLParser(**kw) parser.set_element_class_lookup( ObjectifyElementClassLookup() ) return parser cdef _Element _makeElement(tag, text, attrib, nsmap): return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap) ################################################################################ # Module level factory functions cdef object _fromstring _fromstring = etree.fromstring SubElement = etree.SubElement def fromstring(xml, parser=None, *, base_url=None): u"""fromstring(xml, parser=None, base_url=None) Objectify specific version of the lxml.etree fromstring() function that uses the objectify parser. You can pass a different parser as second argument. The ``base_url`` keyword argument allows to set the original base URL of the document to support relative Paths when looking up external entities (DTD, XInclude, ...). """ if parser is None: parser = objectify_parser return _fromstring(xml, parser, base_url=base_url) def XML(xml, parser=None, *, base_url=None): u"""XML(xml, parser=None, base_url=None) Objectify specific version of the lxml.etree XML() literal factory that uses the objectify parser. You can pass a different parser as second argument. The ``base_url`` keyword argument allows to set the original base URL of the document to support relative Paths when looking up external entities (DTD, XInclude, ...). """ if parser is None: parser = objectify_parser return _fromstring(xml, parser, base_url=base_url) cdef object _parse _parse = etree.parse def parse(f, parser=None, *, base_url=None): u"""parse(f, parser=None, base_url=None) Parse a file or file-like object with the objectify parser. You can pass a different parser as second argument. The ``base_url`` keyword allows setting a URL for the document when parsing from a file-like object. This is needed when looking up external entities (DTD, XInclude, ...) with relative paths. """ if parser is None: parser = objectify_parser return _parse(f, parser, base_url=base_url) cdef dict _DEFAULT_NSMAP = { "py" : PYTYPE_NAMESPACE, "xsi" : XML_SCHEMA_INSTANCE_NS, "xsd" : XML_SCHEMA_NS } E = ElementMaker() def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): u"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) Objectify specific version of the lxml.etree Element() factory that always creates a structural (tree) element. NOTE: requires parser based element class lookup activated in lxml.etree! """ if attrib is not None: if _attributes: attrib = dict(attrib) attrib.update(_attributes) _attributes = attrib if _pytype is None: _pytype = TREE_PYTYPE_NAME if nsmap is None: nsmap = _DEFAULT_NSMAP _attributes[PYTYPE_ATTRIBUTE] = _pytype return _makeElement(_tag, None, _attributes, nsmap) def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, **_attributes): u"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) Create a new element from a Python value and XML attributes taken from keyword arguments or a dictionary passed as second argument. Automatically adds a 'pytype' attribute for the Python type of the value, if the type can be identified. If '_pytype' or '_xsi' are among the keyword arguments, they will be used instead. If the _value argument is an ObjectifiedDataElement instance, its py:pytype, xsi:type and other attributes and nsmap are reused unless they are redefined in attrib and/or keyword arguments. """ if nsmap is None: nsmap = _DEFAULT_NSMAP if attrib is not None and attrib: if _attributes: attrib = dict(attrib) attrib.update(_attributes) _attributes = attrib if isinstance(_value, ObjectifiedElement): if _pytype is None: if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP: # special case: no change! return _value.__copy__() if isinstance(_value, ObjectifiedDataElement): # reuse existing nsmap unless redefined in nsmap parameter temp = _value.nsmap if temp is not None and temp: temp = dict(temp) temp.update(nsmap) nsmap = temp # reuse existing attributes unless redefined in attrib/_attributes temp = _value.attrib if temp is not None and temp: temp = dict(temp) temp.update(_attributes) _attributes = temp # reuse existing xsi:type or py:pytype attributes, unless provided as # arguments if _xsi is None and _pytype is None: _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) _pytype = _attributes.get(PYTYPE_ATTRIBUTE) if _xsi is not None: if u':' in _xsi: prefix, name = _xsi.split(u':', 1) ns = nsmap.get(prefix) if ns != XML_SCHEMA_NS: raise ValueError, u"XSD types require the XSD namespace" elif nsmap is _DEFAULT_NSMAP: name = _xsi _xsi = u'xsd:' + _xsi else: name = _xsi for prefix, ns in nsmap.items(): if ns == XML_SCHEMA_NS: if prefix is not None and prefix: _xsi = prefix + u':' + _xsi break else: raise ValueError, u"XSD types require the XSD namespace" _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi if _pytype is None: # allow using unregistered or even wrong xsi:type names py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi) if py_type is None: py_type = <PyType>_SCHEMA_TYPE_DICT.get(name) if py_type is not None: _pytype = py_type.name if _pytype is None: _pytype = _pytypename(_value) if _value is None and _pytype != u"str": _pytype = _pytype or u"NoneType" strval = None elif python._isString(_value): strval = _value elif isinstance(_value, bool): if _value: strval = u"true" else: strval = u"false" else: py_type = <PyType>_PYTYPE_DICT.get(_pytype) stringify = unicode if py_type is None else py_type.stringify strval = stringify(_value) if _pytype is not None: if _pytype == u"NoneType" or _pytype == u"none": strval = None _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = u"true" else: # check if type information from arguments is valid py_type = <PyType>_PYTYPE_DICT.get(_pytype) if py_type is not None: if py_type.type_check is not None: py_type.type_check(strval) _attributes[PYTYPE_ATTRIBUTE] = _pytype return _makeElement(u"value", strval, _attributes, nsmap) ################################################################################ # ObjectPath include "objectpath.pxi"