3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
5 # limited xpath support for element trees
8 # 2003-05-23 fl created
9 # 2003-05-28 fl added support for // etc
10 # 2003-08-27 fl fixed parsing of periods in element names
12 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
14 # fredrik@pythonware.com
15 # http://www.pythonware.com
17 # --------------------------------------------------------------------
18 # The ElementTree toolkit is
20 # Copyright (c) 1999-2004 by Fredrik Lundh
22 # By obtaining, using, and/or copying this software and/or its
23 # associated documentation, you agree that you have read, understood,
24 # and will comply with the following terms and conditions:
26 # Permission to use, copy, modify, and distribute this software and
27 # its associated documentation for any purpose and without fee is
28 # hereby granted, provided that the above copyright notice appears in
29 # all copies, and that both that copyright notice and this permission
30 # notice appear in supporting documentation, and that the name of
31 # Secret Labs AB or the author not be used in advertising or publicity
32 # pertaining to distribution of the software without specific, written
35 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
36 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
37 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
38 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
39 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
40 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
41 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
43 # --------------------------------------------------------------------
45 # Licensed to PSF under a Contributor Agreement.
46 # See http://www.python.org/2.4/license for licensing details.
49 # Implementation module for XPath support. There's usually no reason
50 # to import this module directly; the <b>ElementTree</b> does this for
56 xpath_tokenizer
= re
.compile(
57 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
60 class xpath_descendant_or_self
:
64 # Wrapper for a compiled XPath.
69 # Create an Path instance from an XPath expression.
71 def __init__(self
, path
):
72 tokens
= xpath_tokenizer(path
)
73 # the current version supports 'path/path'-style expressions only
76 if tokens
and tokens
[0][0] == "/":
77 raise SyntaxError("cannot use absolute path on element")
79 op
, tag
= tokens
.pop(0)
81 self
.path
.append(tag
or op
)
85 self
.path
.append(xpath_descendant_or_self())
88 raise SyntaxError("unsupported path syntax (%s)" % op
)
90 op
, tag
= tokens
.pop(0)
93 "expected path separator (%s)" % (op
or tag
)
95 if self
.path
and isinstance(self
.path
[-1], xpath_descendant_or_self
):
96 raise SyntaxError("path cannot end with //")
97 if len(self
.path
) == 1 and isinstance(self
.path
[0], type("")):
98 self
.tag
= self
.path
[0]
101 # Find first matching object.
103 def find(self
, element
):
106 nodeset
= self
.findall(element
)
116 # Find text for first matching object.
118 def findtext(self
, element
, default
=None):
121 nodeset
= self
.findall(element
)
124 return nodeset
[0].text
or ""
127 return elem
.text
or ""
131 # Find all matching objects.
133 def findall(self
, element
):
138 path
= self
.path
[index
]
143 if isinstance(path
, xpath_descendant_or_self
):
145 tag
= self
.path
[index
]
146 if not isinstance(tag
, type("")):
151 tag
= None # invalid path
153 new
= list(node
.getiterator(tag
))
154 if new
and new
[0] is node
:
161 if path
== "*" or node
.tag
== path
:
170 # (Internal) Compile path.
177 if len(_cache
) >= 100:
183 # Find first matching object.
185 def find(element
, path
):
186 return _compile(path
).find(element
)
189 # Find text for first matching object.
191 def findtext(element
, path
, default
=None):
192 return _compile(path
).findtext(element
, default
)
195 # Find all matching objects.
197 def findall(element
, path
):
198 return _compile(path
).findall(element
)