Lib/xml/etree/ElementPath.py

   1 #
   2 # ElementTree
   3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
   4 #
   5 # limited xpath support for element trees
   6 #
   7 # history:
   8 # 2003-05-23 fl   created
   9 # 2003-05-28 fl   added support for // etc
  10 # 2003-08-27 fl   fixed parsing of periods in element names
  11 #
  12 # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved.
  13 #
  14 # fredrik@pythonware.com
  15 # http://www.pythonware.com
  16 #
  17 # --------------------------------------------------------------------
  18 # The ElementTree toolkit is
  19 #
  20 # Copyright (c) 1999-2004 by Fredrik Lundh
  21 #
  22 # By obtaining, using, and/or copying this software and/or its
  23 # associated documentation, you agree that you have read, understood,
  24 # and will comply with the following terms and conditions:
  25 #
  26 # Permission to use, copy, modify, and distribute this software and
  27 # its associated documentation for any purpose and without fee is
  28 # hereby granted, provided that the above copyright notice appears in
  29 # all copies, and that both that copyright notice and this permission
  30 # notice appear in supporting documentation, and that the name of
  31 # Secret Labs AB or the author not be used in advertising or publicity
  32 # pertaining to distribution of the software without specific, written
  33 # prior permission.
  34 #
  35 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  36 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  37 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  38 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  39 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  40 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  41 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  42 # OF THIS SOFTWARE.
  43 # --------------------------------------------------------------------
  44
  45 # Licensed to PSF under a Contributor Agreement.
  46 # See http://www.python.org/2.4/license for licensing details.
  47
  48 ##
  49 # Implementation module for XPath support.  There's usually no reason
  50 # to import this module directly; the <b>ElementTree</b> does this for
  51 # you, if needed.
  52 ##
  53
  54 import re
  55
  56 xpath_tokenizer = re.compile(
  57     "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
  58     ).findall
  59
  60 class xpath_descendant_or_self:
  61     pass
  62
  63 ##
  64 # Wrapper for a compiled XPath.
  65
  66 class Path:
  67
  68     ##
  69     # Create an Path instance from an XPath expression.
  70
  71     def __init__(self, path):
  72         tokens = xpath_tokenizer(path)
  73         # the current version supports 'path/path'-style expressions only
  74         self.path = []
  75         self.tag = None
  76         if tokens and tokens[0][0] == "/":
  77             raise SyntaxError("cannot use absolute path on element")
  78         while tokens:
  79             op, tag = tokens.pop(0)
  80             if tag or op == "*":
  81                 self.path.append(tag or op)
  82             elif op == ".":
  83                 pass
  84             elif op == "/":
  85                 self.path.append(xpath_descendant_or_self())
  86                 continue
  87             else:
  88                 raise SyntaxError("unsupported path syntax (%s)" % op)
  89             if tokens:
  90                 op, tag = tokens.pop(0)
  91                 if op != "/":
  92                     raise SyntaxError(
  93                         "expected path separator (%s)" % (op or tag)
  94                         )
  95         if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
  96             raise SyntaxError("path cannot end with //")
  97         if len(self.path) == 1 and isinstance(self.path[0], type("")):
  98             self.tag = self.path[0]
  99
 100     ##
 101     # Find first matching object.
 102
 103     def find(self, element):
 104         tag = self.tag
 105         if tag is None:
 106             nodeset = self.findall(element)
 107             if not nodeset:
 108                 return None
 109             return nodeset[0]
 110         for elem in element:
 111             if elem.tag == tag:
 112                 return elem
 113         return None
 114
 115     ##
 116     # Find text for first matching object.
 117
 118     def findtext(self, element, default=None):
 119         tag = self.tag
 120         if tag is None:
 121             nodeset = self.findall(element)
 122             if not nodeset:
 123                 return default
 124             return nodeset[0].text or ""
 125         for elem in element:
 126             if elem.tag == tag:
 127                 return elem.text or ""
 128         return default
 129
 130     ##
 131     # Find all matching objects.
 132
 133     def findall(self, element):
 134         nodeset = [element]
 135         index = 0
 136         while 1:
 137             try:
 138                 path = self.path[index]
 139                 index = index + 1
 140             except IndexError:
 141                 return nodeset
 142             set = []
 143             if isinstance(path, xpath_descendant_or_self):
 144                 try:
 145                     tag = self.path[index]
 146                     if not isinstance(tag, type("")):
 147                         tag = None
 148                     else:
 149                         index = index + 1
 150                 except IndexError:
 151                     tag = None # invalid path
 152                 for node in nodeset:
 153                     new = list(node.getiterator(tag))
 154                     if new and new[0] is node:
 155                         set.extend(new[1:])
 156                     else:
 157                         set.extend(new)
 158             else:
 159                 for node in nodeset:
 160                     for node in node:
 161                         if path == "*" or node.tag == path:
 162                             set.append(node)
 163             if not set:
 164                 return []
 165             nodeset = set
 166
 167 _cache = {}
 168
 169 ##
 170 # (Internal) Compile path.
 171
 172 def _compile(path):
 173     p = _cache.get(path)
 174     if p is not None:
 175         return p
 176     p = Path(path)
 177     if len(_cache) >= 100:
 178         _cache.clear()
 179     _cache[path] = p
 180     return p
 181
 182 ##
 183 # Find first matching object.
 184
 185 def find(element, path):
 186     return _compile(path).find(element)
 187
 188 ##
 189 # Find text for first matching object.
 190
 191 def findtext(element, path, default=None):
 192     return _compile(path).findtext(element, default)
 193
 194 ##
 195 # Find all matching objects.
 196
 197 def findall(element, path):
 198     return _compile(path).findall(element)