1 """Module for dealing with epub -> booki conversions."""
5 from cStringIO
import StringIO
11 from simplejson
import dumps
13 import lxml
, lxml
.html
, lxml
.cssselect
14 from lxml
import etree
16 from objavi
.config
import DC
, XHTML
, XHTMLNS
, FM
17 from booki
.bookizip
import BookiZip
19 #XML namespaces. The *NS varients are in {curly brackets} for clark's syntax
20 XMLNS
= '{http://www.w3.org/XML/1998/namespace}'
21 DAISYNS
= '{http://www.daisy.org/z3986/2005/ncx/}'
22 OPFNS
= '{http://www.idpf.org/2007/opf}'
23 CONTAINERNS
= '{urn:oasis:names:tc:opendocument:xmlns:container}'
25 MARKUP_TYPES
= ('application/xhtml+xml', 'text/html', "application/x-dtbncx+xml")
26 HTML_TYPES
= ('application/xhtml+xml', 'text/html')
28 def log(*messages
, **kwargs
):
31 print >> sys
.stderr
, m
33 print >> sys
.stderr
, repr(m
)
36 html_parser
= lxml
.html
.HTMLParser(encoding
="utf-8")
37 xhtml_parser
= lxml
.html
.XHTMLParser(encoding
="utf-8")
39 def _xhtml_parse(*args
, **kwargs
):
40 kwargs
['parser'] = xhtml_parser
41 return lxml
.html
.parse(*args
, **kwargs
)
43 def _html_parse(*args
, **kwargs
):
44 kwargs
['parser'] = html_parser
45 return lxml
.html
.parse(*args
, **kwargs
)
47 def new_doc(guts
="", version
="1.1", lang
=None):
48 xmldec
= '<?xml version="1.0" encoding="UTF-8"?>'
50 '1.1': ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
51 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'),
52 '1.0': ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
53 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n')
56 if lang
in (None, 'und', 'UND'):
59 langdec
= 'xml:lang="%s" lang="%s"' % (lang
, lang
)
61 doc
= ('<html xmlns="%s" version="XHTML %s" %s>'
62 '<head></head><body>%s</body></html>'
63 % (XHTML
, version
, langdec
, guts
))
65 f
= StringIO(xmldec
+ doctypes
.get(version
, '') + doc
)
66 tree
= lxml
.html
.parse(f
)
71 class EpubError(Exception):
86 Great Expectations.opf
91 <other HTML files for the remaining chapters>
95 # Zip is a variable format, and zipfile is limited. If that
96 # becomes a problem we will have to ise an `unzip` subprocess,
97 # but it hasn't been so far.
98 if isinstance(src
, str):
99 # Should end with PK<06><05> + 18 more.
100 # Some zips contain 'comments' after that, which breaks ZipFile
101 zipend
= src
.rfind('PK\x05\x06') + 22
102 if len(src
) != zipend
:
106 self
.zip = zipfile
.ZipFile(src
, 'r', compression
=zipfile
.ZIP_DEFLATED
, allowZip64
=True)
107 self
.names
= self
.zip.namelist()
108 self
.info
= self
.zip.infolist()
112 def gettree(self
, name
=None, id=None, parse
=etree
.parse
):
113 """get an XML tree from the given zip filename or manifest ID"""
115 name
, mimetype
= self
.manifest
[id]
116 #Note: python 2.6 (not 2.5) has zipfile.open
117 s
= self
.zip.read(name
)
123 def parse_meta(self
):
124 '''META-INF/container.xml contains one or more <rootfile>
125 nodes. We want the "application/oepbs-package+xml" one.
127 <rootfile full-path="OEBPS/Great Expectations.opf" media-type="application/oebps-package+xml" />
128 <rootfile full-path="PDF/Great Expectations.pdf" media-type="application/pdf" />
130 Other files are allowed in META-INF, but none of them are much
131 use. They are manifest.xml, metadata.xml, signatures.xml,
132 encryption.xml, and rights.xml.
134 tree
= self
.gettree('META-INF/container.xml')
135 for r
in tree
.getiterator(CONTAINERNS
+ 'rootfile'):
136 if r
.get('media-type') == "application/oebps-package+xml":
137 rootfile
= r
.get('full-path')
140 raise EpubError("No OPF rootfile found")
142 self
.opf_file
= rootfile
146 The opf file is arranged like this:
154 Metadata, manifest and spine are parsed in separate helper
157 self
.opfdir
= os
.path
.dirname(self
.opf_file
) #needed for manifest parsing
158 tree
= self
.gettree(self
.opf_file
)
159 root
= tree
.getroot()
160 metadata
= root
.find(OPFNS
+ 'metadata')
161 manifest
= root
.find(OPFNS
+ 'manifest')
162 spine
= root
.find(OPFNS
+ 'spine')
164 self
.metadata
= parse_metadata(metadata
)
165 self
.manifest
= parse_manifest(manifest
, self
.opfdir
)
166 # mapping of filenames to new filenames. This needs to be
167 # done early to detect clashes (e.g. '/images/hello.jpg' and
168 # '/images/big/hello.jpg' would both reduce to
169 # 'static/hello.jpg').
171 for k
, v
in self
.manifest
.items():
173 if mimetype
not in MARKUP_TYPES
:
176 fn
= fn
.rsplit('/', 1)[1]
177 while fn
in self
.media_map
.values():
179 newfn
= 'static/%s' % fn
180 self
.media_map
[oldfn
] = newfn
182 ncxid
, self
.spine
= parse_spine(spine
)
183 self
.ncxfile
= self
.manifest
[ncxid
][0]
185 #there is also an optional guide section, which we ignore
186 guide
= root
.find(OPFNS
+ 'guide')
187 if guide
is not None:
188 self
.guide
= parse_guide(guide
)
194 ncx
= self
.gettree(self
.ncxfile
)
195 self
.ncxdata
= parse_ncx(ncx
)
198 """get all the known metadata and nav data as json."""
200 'metadata': self
.metadata
,
201 'manifest': self
.manifest
,
205 if self
.guide
is not None:
206 data
['guide'] = self
.guide
207 return dumps(data
, indent
=2)
209 def find_language(self
):
210 opflang
= [x
[0].lower() for x
in
211 self
.metadata
.get(DC
, {}).get('language', ())]
213 # XXX Should the ncx language enter into it? Being xml:lang,
214 # it is in theory just the language of the ncx document
215 # itself. But if the metadata lacks language, should it be
216 # used instead? At present, NO.
217 #ncxlang = self.ncxdata['headers'].get('lang', ())
219 # XXX also, for now, ignoring case of badly formed language
220 # codes, conflicting or supplementary languages, etc.
221 opflang
= [x
for x
in opflang
if x
not in ('und', '')]
224 if len(set(opflang
)) > 1:
225 log('%s metadata has more than one language: %s -- using first one'
226 % (self
.origin
, opflang
))
229 def find_probable_chapters(self
):
230 """Try to find the real chapters from the NCX file. The
231 problem is that different epubs all use their own level of
233 # the Black Arrow has (book 1 (c1, c2, c3), book2 (c4, c5, c6..))
234 # and FM books have (section 1 (c1, c2,..),..)
235 # i.e super-chapter blocks
236 # some have (((c1, c2, c3))) -- deeply nested chapters
237 # some have no real chapters, but stupid structure
238 points
= self
.ncxdata
['navmap']['points']
239 pwd
= os
.path
.dirname(self
.ncxfile
)
240 serial_points
, splits
= get_chapter_breaks(points
, pwd
)
241 return serial_points
, splits
243 def concat_document(self
):
244 """Join all the xhtml files together, putting in markers
245 indicating where the splits should be.
247 lang
= self
.find_language()
248 points
= self
.ncxdata
['navmap']['points']
249 pwd
= os
.path
.dirname(self
.ncxfile
)
250 serial_points
, chapter_markers
= get_chapter_breaks(points
, pwd
)
251 doc
= new_doc(lang
=lang
)
252 #log(chapter_markers)
253 for ID
in self
.spine
:
254 fn
, mimetype
= self
.manifest
[ID
]
255 if mimetype
.startswith('image'):
256 root
= lxml
.html
.Element('html')
257 body
= etree
.SubElement(root
, 'body')
258 first_el
= etree
.SubElement(body
, 'img', src
=self
.media_map
.get(fn
, fn
), alt
='')
260 tree
= self
.gettree(fn
, parse
=_html_parse
)
261 root
= tree
.getroot()
262 first_el
= _find_tag(root
, 'body')[0]
263 #point the links to the new names. XXX probably fragile
264 root
.rewrite_links(lambda x
: self
.media_map
.get(os
.path
.join(self
.opfdir
, x
), x
))
266 for depth
, fragment
, point
in chapter_markers
.get(fn
, ()):
268 start
= root
.xpath("//*[@id='%s']" % fragment
)[0]
271 labels
= point
['labels']
272 add_marker(start
, 'espri-chapter-%(id)s' % point
,
273 title
=find_good_label(labels
, lang
),
274 subsections
=str(bool(point
['points'])))
276 add_marker(first_el
, 'espri-new-file-%s' % ID
, title
=fn
)
281 def make_bookizip(self
, zfn
):
282 """Split up the document and construct a booki-toc for it."""
283 doc
= self
.concat_document()
286 chapters
= split_document(doc
)
287 real_chapters
= drop_empty_chapters(chapters
)
288 rightsholders
= [c
for c
, extra
in self
.metadata
[DC
].get('creator', ())]
289 contributors
= rightsholders
+ [c
for c
, extra
in self
.metadata
[DC
].get('contributor', ())]
292 for id, title
, tree
in real_chapters
:
294 root
= tree
.getroot()
298 del root
.attrib
['xmlns']
299 del root
.attrib
['version']
300 del root
.attrib
['xml:lang']
304 head
= root
.makeelement('head')
305 _title
= etree
.SubElement(head
, 'title')
308 #blob = etree.tostring(tree)
309 blob
= lxml
.html
.tostring(tree
)
310 bz
.add_to_package(id, '%s.html' % id, blob
, mediatype
='text/html',
311 contributors
=contributors
,
312 rightsholders
=rightsholders
)
315 #add the images and other non-html data unchanged.
316 for id, data
in self
.manifest
.iteritems():
318 if mimetype
not in MARKUP_TYPES
:
319 blob
= self
.zip.read(fn
)
320 bz
.add_to_package(id, self
.media_map
[fn
], blob
, mimetype
,
321 contributors
=contributors
,
322 rightsholders
=rightsholders
325 #now to construct a table of contents
326 lang
= self
.find_language()
329 def write_toc(point
, section
):
331 title
= find_good_label(point
['labels'], lang
),
332 if title
and title
[0]:
333 tocpoint
['title'] = title
[0]
336 tocpoint
['url'] = self
.manifest
.get(ID
, ID
+ '.html')
338 tp
= deferred_urls
.pop()
339 tp
['url'] = tocpoint
['url']
340 log('%r has deferred url: %r' % (tp
['title'], tp
['url']))
342 deferred_urls
.append(tocpoint
)
344 tocpoint
['children'] = []
345 for child
in point
['points']:
346 write_toc(child
, tocpoint
['children'])
348 section
.append(tocpoint
)
351 points
= self
.ncxdata
['navmap']['points']
355 metadata
= {FM
: {'book':{},
360 for namespace
, keys
in self
.metadata
.items():
361 if 'namespace' not in metadata
:
362 metadata
[namespace
] = {}
364 for key
, values
in keys
.items():
365 dest
= metadata
[namespace
].setdefault(key
, {})
366 for value
, extra
in values
:
369 for x
in ('scheme', 'role'):
373 dest
.setdefault(scheme
, []).append(value
)
375 if not metadata
[FM
]['book']:
376 metadata
[FM
]['book'][''] = [''.join(x
for x
in str(metadata
[DC
]['identifier'][''][0]) if x
.isalnum())]
377 if not metadata
[FM
]['server']:
378 metadata
[FM
]['server'][''] = ['booki.flossmanuals.net']
385 'metadata': metadata
,
392 def find_good_label(labels
, lang
=None):
393 """Try to find a suitable label from a dictionary mapping
394 languages to labels, resorting to a random label if need be."""
395 #XXX not taking into account language sub-tags ("en_GB")
396 for x
in [lang
, None]:
400 #return random.choice(labels.values())
401 return ' | '.join(labels
.values())
405 #labels.get(lang, '\n'.join(labels.values())),
407 def drop_empty_chapters(chapters
):
408 """If the chapter has no content, ignore it. Content is defined
409 as images or text."""
413 for e
in c
[2].iter():
414 if ((e
.text
and e
.text
.strip()) or
415 (e
.tail
and e
.tail
.strip()) or
420 good_chapters
.append(c
)
424 def copy_element(src
, create
):
425 """Return a copy of the src element, with all its attributes and
426 tail, using create to make the copy. create is probably an
427 Element._makeelement method, to associate the copy with the right
428 tree, but it could be etree.HTMLElement."""
429 if isinstance(src
.tag
, basestring
):
430 dest
= create(src
.tag
)
432 dest
= copy
.copy(src
)
434 for k
, v
in src
.items():
439 def split_document(doc
):
440 """Split the document along chapter boundaries."""
443 except AttributeError:
446 front_matter
= copy_element(root
, lxml
.html
.Element
)
447 chapters
= [('espri-unindexed-front-matter',
448 'Unindexed Front Matter',
451 _climb_and_split(root
, front_matter
, chapters
)
454 def _climb_and_split(src
, dest
, chapters
):
455 for child
in src
.iterchildren():
456 if child
.tag
== 'hr' and child
.get('class') == MARKER_CLASS
:
458 if ID
.startswith('espri-chapter-'):
459 title
= child
.get('title') or ID
460 new
= copy_element(src
, lxml
.html
.Element
)
463 for a
in src
.iterancestors():
464 a2
= copy_element(a
, root
.makeelement
)
468 chapters
.append((ID
[14:], title
, root
))
471 for a
in dest
.iterancestors():
476 log("skipping %s" % etree
.tostring(child
))
479 new
= copy_element(child
, dest
.makeelement
)
480 new
.text
= child
.text
482 _climb_and_split(child
, new
, chapters
)
485 def save_chapters(chapters
):
486 for id, tree
in chapters
.items():
487 string
= lxml
.html
.tostring(tree
, method
='html')
488 f
= open('/tmp/x%s.html' % id, 'w')
493 def add_guts(src
, dest
):
494 """Append the contents of the <body> of one tree onto that of
495 another. The source tree will be emptied."""
496 #print lxml.etree.tostring(src)
497 sbody
= _find_tag(src
, 'body')
498 dbody
= _find_tag(dest
, 'body')
500 dbody
[-1].tail
= ((dbody
[-1].tail
or '') +
501 (sbody
.text
or '')) or None
503 dbody
.text
= sbody
.text
508 dbody
.tail
= ((dbody
.tail
or '') +
509 (sbody
.tail
or '')) or None
513 def _find_tag(doc
, tag
):
514 #log(lxml.etree.tostring(doc, encoding='utf-8', method='html').replace(' ', ''))
517 except AttributeError:
521 return doc
.iter(XHTMLNS
+ tag
).next()
522 except StopIteration:
523 log('doc had nsmap %s, but did not seem to be xhtml (looking for %s)' % (doc
.nsmap
, tag
))
524 return doc
.iter(tag
).next()
526 MARKER_CLASS
="espri-marker"
528 def add_marker(el
, ID
, **kwargs
):
529 """Add a marker before the elememt"""
530 marker
= el
.makeelement('hr')
532 marker
.set('class', MARKER_CLASS
)
533 for k
, v
in kwargs
.items():
535 parent
= el
.getparent()
536 index
= parent
.index(el
)
537 parent
.insert(index
, marker
)
541 def get_chapter_breaks(points
, pwd
):
542 # First go was overly complex, trying to guess which sections were
543 # really chapters. Now, every ncx navpoint is a chapter break.
545 def serialise(p
, depth
):
546 serial_points
.append((depth
, p
))
548 # log("found class=='%s' at depth %s" % (p['class'], depth))
549 if not p
.get('points'):
551 for child
in p
['points']:
552 bottom
= serialise(child
, depth
+ 1)
558 for depth
, p
in serial_points
:
559 url
, ID
= p
['content_src'], None
560 url
= os
.path
.join(pwd
, url
)
562 log("GOT a fragment! %s" % url
)
563 url
, ID
= url
.split('#', 1)
564 s
= splits
.setdefault(url
, [])
565 s
.append((depth
, ID
, p
))
567 return serial_points
, splits
570 def parse_metadata(metadata
):
571 """metadata is an OPF metadata node, as defined at
572 http://www.idpf.org/2007/opf/OPF_2.0_final_spec.html#Section2.2
573 (or a dc-metadata or x-metadata child thereof).
576 # the node probably has at least 'dc', 'opf', and None namespace
577 # prefixes. None and opf probably map to the same thing. 'dc' is
579 nsmap
= metadata
.nsmap
580 nstags
= dict((k
, '{%s}' % v
) for k
, v
in nsmap
.iteritems())
581 default_ns
= nstags
[None]
583 # Collect element data in namespace-bins, and map prefixes to
584 # those bins for convenience
585 nsdict
= dict((v
, {}) for v
in nsmap
.values())
587 def add_item(ns
, tag
, value
, extra
):
588 #any key can be duplicate, so store in a list
591 values
= nsdict
[ns
].setdefault(tag
, [])
592 values
.append((value
, extra
))
594 for t
in metadata
.iterdescendants():
595 #look for special OPF tags
596 if t
.tag
== default_ns
+ 'meta':
597 #meta tags <meta name="" content="" />
599 content
= t
.get('content')
600 others
= dict((k
, v
) for k
, v
in t
.items() if k
not in ('name', 'content'))
602 # the meta tag is using xml namespaces in attribute values.
603 prefix
, name
= name
.split(':', 1)
606 add_item(t
.nsmap
[prefix
], name
, content
, others
)
609 if t
.tag
in (default_ns
+ 'dc-metadata', default_ns
+ 'x-metadata'):
610 # Subelements of these deprecated elements are in either
611 # DC or non-DC namespace (respectively). Of course, this
612 # is true of any element anyway, so it is sufficent to
613 # ignore this (unless we want to cause pedantic errors).
614 log("found a live %s tag; descending into but otherwise ignoring it"
615 % t
.tag
[len(default_ns
):])
618 tag
= t
.tag
[t
.tag
.rfind('}') + 1:]
619 add_item(t
.nsmap
[t
.prefix
], tag
, t
.text
,
620 tuple((k
.replace(default_ns
, ''), v
) for k
, v
in t
.items()))
625 def parse_manifest(manifest
, pwd
):
627 Only contains <item>s; each <item> has id, href, and media-type.
629 It includes 'toc.ncx', but not 'META-INF/container.xml' or the pbf
630 file (i.e., the files needed to get this far).
632 The manifest can specify fallbacks for unrecognised documents, but
633 Espri does not use that (nor do any of the test epub files).
636 <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />
637 <item id="WHume_NatureC01" href="Hume_NatureC01.html" media-type="application/xhtml+xml" />
638 <item id="cover" href="cover.jpg" media-type="image/jpeg" />
642 ns
= '{%s}' % manifest
.nsmap
[None]
644 for t
in manifest
.iterchildren(ns
+ 'item'):
646 href
= os
.path
.join(pwd
, t
.get('href'))
647 media_type
= t
.get('media-type')
648 items
[id] = (href
, media_type
) #XXX does media-type matter?
652 def parse_spine(spine
):
653 """The spine is an ordered list of xhtml documents (or dtbook, but
654 Booki can't edit that, or manifest items that 'fallback' to xhtml,
655 which Espri doesn't yet handle). Also, anything in the manifest
656 that can be in the spine, must be.
658 Spine itemrefs can have a 'linear' attribute, with a value of
659 'yes' or 'no' (defaulting to 'yes'). If an item is linear, it is
660 in the main stream of the book. Reader software is allowed to
661 ignore this distinction, as Espri does.
663 The toc attribute points to the ncx file (via manifest id).
666 ns
= '{%s}' % spine
.nsmap
[None]
667 for t
in spine
.iterchildren(ns
+ 'itemref'):
668 items
.append(t
.get('idref'))
670 toc
= spine
.get('toc')
674 def parse_guide(guide
):
675 """Parse the guide from the opf file."""
677 ns
= '{%s}' % guide
.nsmap
[None]
678 for r
in guide
.iterchildren(ns
+ 'reference'):
679 items
.append((r
.get('href'), r
.get('type'), r
.get('title'),))
685 """get text content from an <xx><text>...</text></xx> construct,
686 as is common in NCX files."""
687 # there will only be one <text>, but for...iter is still easiest
688 for t
in e
.iter(DAISYNS
+ 'text'):
690 return '' # or leave it at None?
692 def get_labels(e
, tag
='{http://www.daisy.org/z3986/2005/ncx/}navLabel'):
693 """Make a mapping of languages to labels."""
694 # This reads navInfo or navLabel tags. navInfo is unlikely, but
695 # navLabel is ubiquitous. There can be one for each language, so
698 for label
in e
.findall(tag
):
699 lang
= label
.get(XMLNS
+ 'lang')
700 labels
[lang
] = get_ncxtext(e
)
705 The NCX file is the closest thing to FLOSS Manuals TOC.txt. It
706 describes the heirarchical structure of the document (wheras the
707 spine describes its 'physical' structure).
709 #<!ELEMENT ncx (head, docTitle, docAuthor*, navMap, pageList?, navList*)>
712 #if a header is set multiple times, keep all
713 def setheader(name
, content
, scheme
=None):
714 values
= headers
.setdefault(name
, [])
715 values
.append((content
, scheme
))
717 head
= ncx
.find(DAISYNS
+ 'head')
718 #<!ELEMENT head (meta+)>
719 for meta
in head
.findall(DAISYNS
+ 'meta'):
720 #whatever 'scheme' is
721 setheader(meta
.get('name'), meta
.get('content'), meta
.get('scheme'))
723 for t
in ('docTitle', 'docAuthor'):
724 for e
in ncx
.findall(DAISYNS
+ t
):
726 setheader(t
, get_ncxtext(e
))
729 for attr
, header
in (('dir', 'dir'),
730 (XMLNS
+ 'lang', 'lang')):
731 value
= root
.get(attr
)
732 if value
is not None:
733 setheader(header
, value
)
735 navmap
= root
.find(DAISYNS
+ 'navMap')
738 'navmap': parse_navmap(navmap
),
741 #Try adding these bits, even though no-one has them and they are no use.
742 pagelist
= ncx
.find(DAISYNS
+ 'pageList')
743 navlist
= ncx
.find(DAISYNS
+ 'navList')
744 if pagelist
is not None:
745 ret
['pagelist'] = parse_pagelist(pagelist
)
746 if navlist
is not None:
747 ret
['navlist'] = parse_navlist(navlist
)
753 #<!ELEMENT navMap (navInfo*, navLabel*, navPoint+)>
754 #XXX move info and labels out of navmap, and into headers?
756 'info': get_labels(e
, DAISYNS
+ 'navInfo'),
757 'labels': get_labels(e
),
758 'points': tuple(parse_navpoint(x
) for x
in e
.findall(DAISYNS
+ 'navPoint')),
761 def parse_navpoint(e
):
762 #<!ELEMENT navPoint (navLabel+, content, navPoint*)>
763 c
= e
.find(DAISYNS
+ 'content')
764 subpoints
= tuple(parse_navpoint(x
) for x
in e
.findall(DAISYNS
+ 'navPoint'))
767 'class': e
.get('class'),
768 'play_order': int(e
.get('playOrder')),
769 #'content_id': c.get('id'),
770 'content_src': c
.get('src'),
771 'labels': get_labels(e
),
776 def parse_pagelist(e
):
777 # <!ELEMENT pageList (navInfo*, navLabel*, pageTarget+)>
779 'info': get_labels(e
, DAISYNS
+ 'navInfo'),
780 'labels': get_labels(e
),
781 'targets': tuple(parse_pagetarget(x
) for x
in e
.findall(DAISYNS
+ 'pageTarget')),
784 def parse_pagetarget(e
):
785 #<!ELEMENT pageTarget (navLabel+, content)>
786 labels
= get_labels(e
)
787 c
= e
.find(DAISYNS
+ 'content')
790 'type': e
.get('type'),
791 'play_order': int(e
.get('playOrder')),
792 'content_src': c
.get('src'),
793 'labels': get_labels(e
),
795 value
= e
.get('value')
796 if value
is not None:
800 def parse_navlist(e
):
801 #<!ELEMENT navList (navInfo*, navLabel+, navTarget+)>
803 'info': get_labels(e
, DAISYNS
+ 'navInfo'),
804 'labels': get_labels(e
),
805 'targets': tuple(parse_navtarget(x
) for x
in e
.findall(DAISYNS
+ 'navTarget')),
808 def parse_navtarget(e
):
809 #<!ELEMENT navTarget (navLabel+, content)>
810 labels
= get_labels(e
)
811 c
= e
.find(DAISYNS
+ 'content')
814 'play_order': int(e
.get('playOrder')),
815 'content_src': c
.get('src'),
816 'labels': get_labels(e
),
818 value
= e
.get('value')
819 if value
is not None: