2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 Transforms related to the front matter of a document or a section
7 (information found before the main text):
9 - `DocTitle`: Used to transform a lone top level section's title to
10 the document title, promote a remaining lone top-level section's
11 title to the document subtitle, and determine the document's title
12 metadata (document['title']) based on the document title and/or the
15 - `SectionSubTitle`: Used to transform a lone subsection into a
18 - `DocInfo`: Used to transform a bibliographic field list into docinfo
22 __docformat__
= 'reStructuredText'
25 from docutils
import nodes
, utils
26 from docutils
.transforms
import TransformError
, Transform
29 class TitlePromoter(Transform
):
32 Abstract base class for DocTitle and SectionSubTitle transforms.
35 def promote_title(self
, node
):
37 Transform the following tree::
50 `node` is normally a document.
53 if not isinstance(node
, nodes
.Element
):
54 raise TypeError, 'node must be of Element-derived type.'
56 # `node` must not have a title yet.
57 assert not (len(node
) and isinstance(node
[0], nodes
.title
))
58 section
, index
= self
.candidate_index(node
)
62 # Transfer the section's attributes to the node:
63 # NOTE: Change second parameter to False to NOT replace
64 # attributes that already exist in node with those in
66 # NOTE: Remove third parameter to NOT copy the 'source'
67 # attribute from section
68 node
.update_all_atts_concatenating(section
, True, True)
70 # setup_child is called automatically for all nodes.
71 node
[:] = (section
[:1] # section title
72 + node
[:index
] # everything that was in the
73 # node before the section
74 + section
[1:]) # everything that was in the section
75 assert isinstance(node
[0], nodes
.title
)
78 def promote_subtitle(self
, node
):
80 Transform the following node tree::
96 if not isinstance(node
, nodes
.Element
):
97 raise TypeError, 'node must be of Element-derived type.'
99 subsection
, index
= self
.candidate_index(node
)
102 subtitle
= nodes
.subtitle()
104 # Transfer the subsection's attributes to the new subtitle
105 # NOTE: Change second parameter to False to NOT replace
106 # attributes that already exist in node with those in
108 # NOTE: Remove third parameter to NOT copy the 'source'
109 # attribute from section
110 subtitle
.update_all_atts_concatenating(subsection
, True, True)
112 # Transfer the contents of the subsection's title to the
114 subtitle
[:] = subsection
[0][:]
115 node
[:] = (node
[:1] # title
117 # everything that was before the section:
119 # everything that was in the subsection:
123 def candidate_index(self
, node
):
125 Find and return the promotion candidate and its index.
127 Return (None, None) if no valid candidate was found.
129 index
= node
.first_child_not_matching_class(
130 nodes
.PreBibliographic
)
131 if index
is None or len(node
) > (index
+ 1) or \
132 not isinstance(node
[index
], nodes
.section
):
135 return node
[index
], index
138 class DocTitle(TitlePromoter
):
141 In reStructuredText_, there is no way to specify a document title
142 and subtitle explicitly. Instead, we can supply the document title
143 (and possibly the subtitle as well) implicitly, and use this
144 two-step transform to "raise" or "promote" the title(s) (and their
145 corresponding section contents) to the document level.
147 1. If the document contains a single top-level section as its
148 first non-comment element, the top-level section's title
149 becomes the document's title, and the top-level section's
150 contents become the document's immediate contents. The lone
151 top-level section header must be the first non-comment element
154 For example, take this input text::
162 Once parsed, it looks like this::
165 <section names="top-level title">
171 After running the DocTitle transform, we have::
173 <document names="top-level title">
179 2. If step 1 successfully determines the document title, we
180 continue by checking for a subtitle.
182 If the lone top-level section itself contains a single
183 second-level section as its first non-comment element, that
184 section's title is promoted to the document's subtitle, and
185 that section's contents become the document's immediate
186 contents. Given this input text::
197 After parsing and running the Section Promotion transform, the
200 <document names="top-level title">
203 <subtitle names="second-level title">
208 (Note that the implicit hyperlink target generated by the
209 "Second-Level Title" is preserved on the "subtitle" element
212 Any comment elements occurring before the document title or
213 subtitle are accumulated and inserted as the first body elements
216 This transform also sets the document's metadata title
219 .. _reStructuredText: http://docutils.sf.net/rst.html
222 default_priority
= 320
224 def set_metadata(self
):
226 Set document['title'] metadata title from the following
227 sources, listed in order of priority:
229 * Existing document['title'] attribute.
231 * Document title node (as promoted by promote_title).
233 if not self
.document
.hasattr('title'):
234 if self
.document
.settings
.title
is not None:
235 self
.document
['title'] = self
.document
.settings
.title
236 elif len(self
.document
) and isinstance(self
.document
[0], nodes
.title
):
237 self
.document
['title'] = self
.document
[0].astext()
240 if getattr(self
.document
.settings
, 'doctitle_xform', 1):
241 # promote_(sub)title defined in TitlePromoter base class.
242 if self
.promote_title(self
.document
):
243 # If a title has been promoted, also try to promote a
245 self
.promote_subtitle(self
.document
)
246 # Set document['title'].
250 class SectionSubTitle(TitlePromoter
):
253 This works like document subtitles, but for sections. For example, ::
263 is transformed into ::
272 For details refer to the docstring of DocTitle.
275 default_priority
= 350
278 if not getattr(self
.document
.settings
, 'sectsubtitle_xform', 1):
280 for section
in self
.document
.traverse(nodes
.section
):
281 # On our way through the node tree, we are deleting
282 # sections, but we call self.promote_subtitle for those
283 # sections nonetheless. To do: Write a test case which
284 # shows the problem and discuss on Docutils-develop.
285 self
.promote_subtitle(section
)
288 class DocInfo(Transform
):
291 This transform is specific to the reStructuredText_ markup syntax;
292 see "Bibliographic Fields" in the `reStructuredText Markup
293 Specification`_ for a high-level description. This transform
294 should be run *after* the `DocTitle` transform.
296 Given a field list as the first non-comment element after the
297 document title and subtitle (if present), registered bibliographic
298 field names are transformed to the corresponding DTD elements,
299 becoming child elements of the "docinfo" element (except for a
300 dedication and/or an abstract, which become "topic" elements after
303 For example, given this document fragment after parsing::
323 After running the bibliographic field list transform, the
324 resulting document tree would look like this::
336 The "Status" field contained an expanded RCS keyword, which is
337 normally (but optionally) cleaned up by the transform. The sole
338 contents of the field body must be a paragraph containing an
339 expanded RCS keyword of the form "$keyword: expansion text $". Any
340 RCS keyword can be processed in any bibliographic field. The
341 dollar signs and leading RCS keyword name are removed. Extra
342 processing is done for the following RCS keywords:
344 - "RCSfile" expands to the name of the file in the RCS or CVS
345 repository, which is the name of the source file with a ",v"
346 suffix appended. The transform will remove the ",v" suffix.
348 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
349 time zone). The RCS Keywords transform will extract just the
350 date itself and transform it to an ISO 8601 format date, as in
353 (Since the source file for this text is itself stored under CVS,
354 we can't show an example of the "Date" RCS keyword because we
355 can't prevent any RCS keywords used in this explanation from
356 being expanded. Only the "RCSfile" keyword is stable; its
357 expansion text changes only if the file name changes.)
359 .. _reStructuredText: http://docutils.sf.net/rst.html
360 .. _reStructuredText Markup Specification:
361 http://docutils.sf.net/docs/ref/rst/restructuredtext.html
364 default_priority
= 340
367 'author': nodes
.author
,
368 'authors': nodes
.authors
,
369 'organization': nodes
.organization
,
370 'address': nodes
.address
,
371 'contact': nodes
.contact
,
372 'version': nodes
.version
,
373 'revision': nodes
.revision
,
374 'status': nodes
.status
,
376 'copyright': nodes
.copyright
,
377 'dedication': nodes
.topic
,
378 'abstract': nodes
.topic
}
379 """Canonical field name (lowcased) to node class name mapping for
380 bibliographic fields (field_list)."""
383 if not getattr(self
.document
.settings
, 'docinfo_xform', 1):
385 document
= self
.document
386 index
= document
.first_child_not_matching_class(
387 nodes
.PreBibliographic
)
390 candidate
= document
[index
]
391 if isinstance(candidate
, nodes
.field_list
):
392 biblioindex
= document
.first_child_not_matching_class(
393 (nodes
.Titular
, nodes
.Decorative
))
394 nodelist
= self
.extract_bibliographic(candidate
)
395 del document
[index
] # untransformed field list (candidate)
396 document
[biblioindex
:biblioindex
] = nodelist
398 def extract_bibliographic(self
, field_list
):
399 docinfo
= nodes
.docinfo()
400 bibliofields
= self
.language
.bibliographic_fields
401 labels
= self
.language
.labels
402 topics
= {'dedication': None, 'abstract': None}
403 for field
in field_list
:
405 name
= field
[0][0].astext()
406 normedname
= nodes
.fully_normalize_name(name
)
407 if not (len(field
) == 2 and normedname
in bibliofields
408 and self
.check_empty_biblio_field(field
, name
)):
410 canonical
= bibliofields
[normedname
]
411 biblioclass
= self
.biblio_nodes
[canonical
]
412 if issubclass(biblioclass
, nodes
.TextElement
):
413 if not self
.check_compound_biblio_field(field
, name
):
415 utils
.clean_rcs_keywords(
416 field
[1][0], self
.rcs_keyword_substitutions
)
417 docinfo
.append(biblioclass('', '', *field
[1][0]))
418 elif issubclass(biblioclass
, nodes
.authors
):
419 self
.extract_authors(field
, name
, docinfo
)
420 elif issubclass(biblioclass
, nodes
.topic
):
421 if topics
[canonical
]:
422 field
[-1] += self
.document
.reporter
.warning(
423 'There can only be one "%s" field.' % name
,
426 title
= nodes
.title(name
, labels
[canonical
])
427 topics
[canonical
] = biblioclass(
428 '', title
, classes
=[canonical
], *field
[1].children
)
430 docinfo
.append(biblioclass('', *field
[1].children
))
431 except TransformError
:
432 if len(field
[-1]) == 1 \
433 and isinstance(field
[-1][0], nodes
.paragraph
):
434 utils
.clean_rcs_keywords(
435 field
[-1][0], self
.rcs_keyword_substitutions
)
436 docinfo
.append(field
)
438 if len(docinfo
) != 0:
439 nodelist
.append(docinfo
)
440 for name
in ('dedication', 'abstract'):
442 nodelist
.append(topics
[name
])
445 def check_empty_biblio_field(self
, field
, name
):
446 if len(field
[-1]) < 1:
447 field
[-1] += self
.document
.reporter
.warning(
448 'Cannot extract empty bibliographic field "%s".' % name
,
453 def check_compound_biblio_field(self
, field
, name
):
454 if len(field
[-1]) > 1:
455 field
[-1] += self
.document
.reporter
.warning(
456 'Cannot extract compound bibliographic field "%s".' % name
,
459 if not isinstance(field
[-1][0], nodes
.paragraph
):
460 field
[-1] += self
.document
.reporter
.warning(
461 'Cannot extract bibliographic field "%s" containing '
462 'anything other than a single paragraph.' % name
,
467 rcs_keyword_substitutions
= [
468 (re
.compile(r
'\$' r
'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
469 r
'[^$]* \$', re
.IGNORECASE
), r
'\1-\2-\3'),
470 (re
.compile(r
'\$' r
'RCSfile: (.+),v \$', re
.IGNORECASE
), r
'\1'),
471 (re
.compile(r
'\$[a-zA-Z]+: (.+) \$'), r
'\1'),]
473 def extract_authors(self
, field
, name
, docinfo
):
475 if len(field
[1]) == 1:
476 if isinstance(field
[1][0], nodes
.paragraph
):
477 authors
= self
.authors_from_one_paragraph(field
)
478 elif isinstance(field
[1][0], nodes
.bullet_list
):
479 authors
= self
.authors_from_bullet_list(field
)
483 authors
= self
.authors_from_paragraphs(field
)
484 authornodes
= [nodes
.author('', '', *author
)
485 for author
in authors
if author
]
486 if len(authornodes
) >= 1:
487 docinfo
.append(nodes
.authors('', *authornodes
))
490 except TransformError
:
491 field
[-1] += self
.document
.reporter
.warning(
492 'Bibliographic field "%s" incompatible with extraction: '
493 'it must contain either a single paragraph (with authors '
494 'separated by one of "%s"), multiple paragraphs (one per '
495 'author), or a bullet list with one paragraph (one author) '
497 % (name
, ''.join(self
.language
.author_separators
)),
501 def authors_from_one_paragraph(self
, field
):
502 text
= field
[1][0].astext().strip()
505 for authorsep
in self
.language
.author_separators
:
506 authornames
= text
.split(authorsep
)
507 if len(authornames
) > 1:
509 authornames
= [author
.strip() for author
in authornames
]
510 authors
= [[nodes
.Text(author
)] for author
in authornames
if author
]
513 def authors_from_bullet_list(self
, field
):
515 for item
in field
[1][0]:
516 if len(item
) != 1 or not isinstance(item
[0], nodes
.paragraph
):
518 authors
.append(item
[0].children
)
523 def authors_from_paragraphs(self
, field
):
524 for item
in field
[1]:
525 if not isinstance(item
, nodes
.paragraph
):
527 authors
= [item
.children
for item
in field
[1]]