Updated release tag for trunk changes 4328 to 4330.
[docutils.git] / src / RST.pm
blobc2ba5b3fb6144f05b19d1ab35239bd71640e790b
1 # $Id$
2 # Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
3 # Distributed under terms of the GNU General Public License (GPL).
5 package RST;
6 # This package does parsing of RST files
8 =pod
9 =begin reST
10 =begin Usage
11 Defines for reStructuredText parser
12 -----------------------------------
13 -D align=<0|1> Allow inferring right/center alignment in
14 single line simple table cells (default is 1).
15 -D entryattr=<text>
16 Specifies attributes to be passed to table entry
17 (default is ''). Note: this option can be
18 changed on the fly within a table by using a perl
19 directive to set $main::opt_D{entryattr}.
20 -D includeext=<text>
21 A colon-separated list of extensions to check for
22 included files. Default is ":.rst:.txt".
23 -D includepath=<text>
24 A colon-separated list of directories for the
25 include directive to search. The special token
26 "<.>" represents the directory of the file
27 containing the directive (which may not be the
28 same as the directory in which trip is invoked, ".".
29 Default is "<.>".
30 -D nestinline[=<0|1>]
31 Specify whether to allow nesting of inline markup.
32 There are some limitations, like strong cannot be
33 nested within emphasis.
34 Default is 1 (1 if specified with no value).
35 -D perlpath=<text>
36 A colon-separated list of directories to search
37 for Perl modules. The special token "<INC>"
38 represents the default Perl include path.
39 Default is "<INC>".
40 -D report=<level> Set verbosity threshold; report system messages
41 at or higher than <level> (by name or number:
42 "info" or "1", warning/2, error/3, severe/4;
43 also, "none" or 5). Default is 2 (warning).
44 -D rowattr=<text> Specifies attributes to be passed to table rows
45 (default is ''). Note: this option can be
46 changed on the fly within a table by using a perl
47 directive to set $main::opt_D{rowattr}.
48 -D source=<text> Overrides the file name as the source.
49 -D tableattr=<text>
50 Specifies attributes to be passed to tables (default
51 is '${RST::DEFAULTS{tableattr}}').
52 Note: this option can be changed on the fly to
53 have tables with different characteristics by
54 using a perl directive to set
55 $main::opt_D{tableattr}.
56 -D tabstops=<num> Specifies that tab characters are assumed to tab
57 out to every <num> characters (default is 8).
58 -D xformoff=<regexp>
59 Turns off default transforms matching regexp.
60 (Used for internal testing.)
61 =end Usage
62 =end reST
63 =cut
65 # Global variables:
66 # Many static (read-only) variables defined in BEGIN blocks (not documented).
68 # ``$RST::NEXT_ID``: The next id to be returned by RST::Id();
69 # ``%RST::SEC_LEVEL``: Hash whose keys are header styles and whose
70 # value (if defined) indicates the level of
71 # sections encoded with that header style.
72 # ``@RST::SEC_DOM``: Array whose index is the section level and
73 # whose value is the section DOM object at that level.
74 # ``@RST::SEC_STYLE``: Array whose index is the section level and
75 # whose value is the section style at that level.
76 # ``@RST::ANONYMOUS_TARGETS``: Array of references to anonymous
77 # target DOMs in file order.
78 # ``%RST::REFERENCE_DOM``: Hash whose keys are tags and whose values
79 # are references to a hash with names or ids as
80 # keys and the associated target DOM object as
81 # value.
82 # ``%RST::TARGET_NAME``: Hash whose keys are namespace ids and whose
83 # values are references to a hash whose keys
84 # are names and whose value is a reference to
85 # an array of all the DOM objects having that
86 # name in that name space.
87 # ``%RST::ALL_TARGET_IDS``: Hash whose keys are ids and whose value
88 # is a reference to an array of all the DOM
89 # objects having that id.
90 # ``%RST::ALL_TARGET_NAMES``: Hash whose keys are names and whose
91 # value is a reference to an array of all the
92 # DOM objects having that name.
93 # ``%RST::IMPLICIT_SCHEME``: Hash whose keys are the names of implicit
94 # URI schemes and whose values are 1.
95 # ``%RST::MY_ROLES``: Hash whose keys are the role names that are
96 # currently defined for the current document
97 # (it gets reset between documents) and whose
98 # values are Role definition hash references.
99 # ``$RST::MY_DEFAULT_ROLE``: The current name of the default role
100 # for the current document. Initially
101 # ``title_reference``.
103 use strict;
105 # Initialized global variables
106 use vars qw($BULLETS $EMAIL $ENUM $ENUM_INDEX $FIELD_LIST $LINE_BLOCK
107 $MARK_END_TRAILER $MARK_START $MIN_SEC_LEN $OPTION
108 $OPTION_LIST $SECTION_HEADER $SEC_CHARS %DIRECTIVES
109 %ERROR_LEVELS %IMPLICIT_SCHEME %LEFT_BRACE %MARK_END
110 %MARK_TAG %MARK_TAG_START %MATCH_BRACE %ROLES
111 $DEFAULT_ROLE %XML_SPACE %DEFAULTS
112 %ALPHA_INDEX %ROMAN_VALS %CITSPACE %NAMESPACE @UNITS);
114 # Run-time global variables
115 use vars qw($NEXT_ID $TOPDOM %ALL_TARGET_IDS %ALL_TARGET_NAMES
116 %REFERENCE_DOM %SEC_LEVEL %TARGET_NAME @ANONYMOUS_TARGETS
117 @ENUM_STRINGS @SEC_DOM @SEC_STYLE %MY_ROLES $MY_DEFAULT_ROLE);
119 BEGIN {
120 use URIre;
121 $SEC_CHARS = '[^a-zA-Z0-9\s]';
122 $SECTION_HEADER = "(((?!$SEC_CHARS+\\n(?:\\n|\\Z))(?!::\\n|(?:(?:\\.\\.|__)\\n(?: |\\.\\.[ \\n]|__[ \\n]|\\n)))($SEC_CHARS)\\3+)\\n(.*\\n)?(($SEC_CHARS)\\6+\\n)?|^(?!(?:\\.\\.|__)(?: .*)?\\n(?:\.\.|__)[ \\n])(\\S.*\\n)(($SEC_CHARS)\\9+)\\n)";
123 $BULLETS = '[-*+]';
124 $LINE_BLOCK = '\|';
125 $MIN_SEC_LEN = 4;
126 my $rst_low_roman = 'm{0,4}(?:dc{0,3}|c[dm]|c{0,3})?(?:lx{0,3}|x[lc]|x{0,3})?(?:vi{0,3}|i[vx]|i{0,3})?';
127 my $rst_upp_roman = $rst_low_roman;
128 $rst_upp_roman =~ tr/a-z/A-Z/;
129 $ENUM_INDEX = "\\d+|[a-zA-Z]|$rst_low_roman|$rst_upp_roman|#";
130 $ENUM = "(\\()?($ENUM_INDEX)([\\).])";
131 $FIELD_LIST = ':(?! )[^:\n]*[^:\n ]:(?!\`[^\`]*?\`)';
132 $OPTION = '[+-][\w](?: [^ ,]+| ?<[^>]+>)?|(?:--?[\w][\w-]*|/[A-Z]+)(?:=[^ ,=]+| [^ ,]+|=<[^>]+>)?';
133 $OPTION_LIST = "(?:$OPTION)(?:, (?:$OPTION))*(?: |\\s*\\n )";
134 %ERROR_LEVELS = (1=>"INFO", 2=>"WARNING", 3=>"ERROR", 4=>"SEVERE");
135 $EMAIL = '[\w.-]+\@[\w.-]*[\w-]';
136 $MARK_START = '\*\*?|\`\`?|\||_\`|\[';
137 %MARK_END = ('*'=>'\*', '**'=>'\*\*', '`'=>'\`_?_?', '``'=>'\`\`',
138 '|'=>'\|_?_?', '_`'=>'\`', '['=>'\]__?',
139 ''=>"__?|$URIre::absoluteURI|$EMAIL");
140 $MARK_END_TRAILER = '[-\'\"\)\]\}\\\\>/:.,;!? ]|\Z';
141 %MARK_TAG = ('**'=>'emphasis', '****'=>'strong', '``'=>'interpreted',
142 '````'=>'literal', '||'=>'substitution_reference',
143 '||_'=>'substitution_reference',
144 '||__'=>'substitution_reference',
145 '_``'=>'target', '[]_'=>'footnote_reference',
146 '``_'=>'reference',
147 '_'=>'reference', '``__'=>'reference',
148 '__'=>'reference');
149 %MARK_TAG_START = ('*'=>'emphasis', '**'=>'strong',
150 '`'=>'interpreted text or phrase reference', '``'=>'literal',
151 '|'=>'substitution_reference', '_`'=>'target',
152 '['=>'footnote', ''=>'reference');
153 %MATCH_BRACE = ('"'=>'"', "'"=>"'", '('=>')', '['=>']', '{'=>'}',
154 '<'=>'>', ''=>'impossible', '_`'=>'`');
155 %LEFT_BRACE = ('>'=>'<', ')'=>'(', ']'=>'[', '}'=>'{');
156 my @implicit_schemes = qw(acap afs cid data dav fax file ftp go
157 gopher h323 http https im imap ipp ldap
158 mailserver mailto mid modem mupdate news
159 nfs nntp opaquelocktoken pop pres
160 prospero rtsp service sip sips soap.beep
161 soap.beeps tel telnet tftp tip tn3270
162 urn vemmi wais xmlrpc.beep xmlrpc.beeps
163 z39.50r z39.50s
165 @IMPLICIT_SCHEME{@implicit_schemes} = (1) x @implicit_schemes;
166 %DIRECTIVES = (admonition=> \&RST::Directive::admonition,
167 attention => \&RST::Directive::admonition,
168 caution => \&RST::Directive::admonition,
169 danger => \&RST::Directive::admonition,
170 error => \&RST::Directive::admonition,
171 hint => \&RST::Directive::admonition,
172 important => \&RST::Directive::admonition,
173 note => \&RST::Directive::admonition,
174 tip => \&RST::Directive::admonition,
175 warning => \&RST::Directive::admonition,
176 footer => \&RST::Directive::decoration,
177 header => \&RST::Directive::decoration,
178 section_numbering
179 => \&RST::Directive::sectnum,
180 section_autonumbering
181 => \&RST::Directive::sectnum,
182 csv_table => \&RST::Directive::table,
183 list_table=> \&RST::Directive::table,
184 restructuredtext_test_directive
185 => \&RST::Directive::test_directive,
187 %ROLES = (emphasis=>{tag=>'emphasis'},
188 strong=>{tag=>'strong'},
189 literal=>{tag=>'literal'},
190 subscript=>{tag=>'subscript'},
191 sub=>{alias=>'subscript'},
192 superscript=>{tag=>'superscript'},
193 sup=>{alias=>'superscript'},
194 ab=>{tag=>'abbreviation'},
195 ac=>{tag=>'acronym'},
196 inline=>{tag=>'inline'},
197 raw=>{tag=>'raw', attr=>{'xml:space'=>'preserve'},
198 check=>\&RST::Role::raw},
200 'raw-formatting'=>{tag=>'inline'},
201 'pep-reference'=>{alias=>'PEP'},
202 PEP=>{tag=>'reference',
203 attr=>{refuri=>"http://www.python.org/peps/pep-%04d.html"},
204 text=>"PEP %s",
205 check=>\&RST::Role::PEP,
207 'rfc-reference'=>{alias=>'RFC'},
208 RFC=>{tag=>'reference',
209 attr=>{refuri=>"http://www.faqs.org/rfcs/rfc%04d.html"},
210 text=>"RFC %s",
211 check=>\&RST::Role::RFC,
213 'title-reference'=>{tag=>'title_reference'},
214 title=>{alias=>'title-reference'},
215 t=>{alias=>'title-reference'},
217 $DEFAULT_ROLE = 'title-reference';
218 @UNITS = (qw(em ex px in cm mm pt pc), '');
219 %XML_SPACE = ('xml:space'=>'preserve');
220 %DEFAULTS = (align=>1, report=>2, includeext=>':.rst:.txt',
221 includepath=>'<.>', nestinline=>1,
222 perlpath=>'<INC>',
223 # tableattr=>'class="table" frame="border" rules="all"',
224 tableattr=>'border="1" class="docutils"',
225 tabstops=>8);
228 # Processes defaults for -D defines and resets global variables
229 # between documents.
230 # Arguments: document DOM object
231 # Returns: None
232 # Sets globals: $RST::NEXT_ID, %RST::SEC_LEVEL, @RST::SEC_DOM,
233 # @RST::SEC_STYLE, @RST::ANONYMOUS_TARGETS,
234 # %RST::REFERENCE_DOM, %RST::TARGET_NAME,
235 # %RST::ALL_TARGET_IDS, %RST::ALL_TARGET_NAMES, $RST::TOPDOM
236 # %RST::MY_ROLES
237 sub init {
238 my ($doc) = @_;
240 foreach (keys %DEFAULTS) {
241 $main::opt_D{$_} = $DEFAULTS{$_} unless defined $main::opt_D{$_};
243 foreach (keys %main::opt_D) {
244 # Force any defines with no values specified to be 1
245 $main::opt_D{$_} = 1
246 if defined $main::opt_D{$_} && $main::opt_D{$_} eq '';
249 undef $NEXT_ID;
250 undef %SEC_LEVEL;
251 @SEC_DOM = ($doc);
252 @SEC_STYLE = ("");
253 undef @ANONYMOUS_TARGETS;
254 undef %REFERENCE_DOM;
255 undef %TARGET_NAME;
256 undef %ALL_TARGET_IDS;
257 undef %ALL_TARGET_NAMES;
259 $TOPDOM = $doc;
261 # Handle the Perl include path
262 my $perl_inc = join(':', @INC);
263 my $new_inc = $main::opt_D{perlpath};
264 $new_inc =~ s/<inc>/$perl_inc/gi;
265 @INC = split(/:/, $new_inc);
266 delete $main::opt_D{perlpath};
267 $MY_DEFAULT_ROLE = $DEFAULT_ROLE;
268 %MY_ROLES = %ROLES;
271 # Returns a DOM object for a problematic with its ids.
272 # Arguments: message, reference id (optional), id (optional)
273 # Returns: DOM object, reference id, id
274 sub problematic {
275 my ($text, $refid, $id) = @_;
277 $refid = Id() unless defined $refid;
278 $id = Id() unless defined $id;
279 my $dom = new DOM('problematic', refid=>$refid, ids=>[ $id ]);
280 $dom->append(newPCDATA DOM($text));
281 return ($dom, $refid, $id);
284 # Returns a DOM object for a system message.
285 # Arguments: severity level, source, line number, message, literal text,
286 # key/value pairs for additional attributes
287 sub system_message {
288 my ($level, $source, $lineno, $msg, $lit, %attr) = @_;
289 my $dom = new DOM("system_message", level=>$level, line=>$lineno,
290 source=>$source,
291 type=>$ERROR_LEVELS{$level}, %attr);
292 my $para = new DOM('paragraph');
293 $para->append(newPCDATA DOM("$msg\n"));
294 $dom->append($para);
295 if (defined $lit && $lit ne '') {
296 my $lb = new DOM('literal_block', %XML_SPACE);
297 $lb->append(newPCDATA DOM($lit));
298 $dom->append($lb);
300 my $line = $lineno ? ":$lineno" : '';
301 print STDERR "$source$line ($ERROR_LEVELS{$level}/$level) $msg\n"
302 if $level >= $main::opt_D{report} && $source ne 'test data';
303 return $dom;
306 # Processes a bulleted list paragraph.
307 # Arguments: paragraph, source, line number
308 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
309 sub BulletList {
310 my($para, $source, $lineno) = @_;
312 my @err;
313 my $lines = 0;
314 my ($processed, @unp);
315 $para =~ /^($BULLETS)(?: |\n)/o;
316 my $dom = new DOM('bullet_list', bullet=>$1);
317 my $bullet = "\\$1";
318 (undef, my @paras) = split /^($bullet(?: +|\n))/m, $para;
319 while (my ($bull, $p) = splice @paras, 0, 2) {
320 $p = '' unless defined $p;
321 my $li = new DOM('list_item');
322 $dom->append($li);
323 my $para = "$bull$p";
324 $para =~ s/^$bullet *//;
325 $para =~ s/^ //mg;
326 Paragraphs($li, $para, $source, $lineno+$lines);
327 $lines += $para =~ tr/\n//;
328 $processed .= $para;
330 return ($processed, @err, $dom, @unp);
333 # Coalesces a series of similar paragraphs and divides initial
334 # paragraph for unexpected indents.
335 # Argument: reference to array of paragraphs
336 # Returns: None (but modifies the paragraphs referenced by the argument)
337 sub Coalesce {
338 my ($paras) = @_;
339 # Note: consecutive paragraphs are two indices apart in the array, with
340 # any blank lines between them in the intermediate index. We also use
341 # the intermediate index to store error sentinels, which begin with a
342 # newline and have a non-blank character in the second line.
343 my $p;
344 my ($enumtype, $enumval, $enumprefix, $enumsuffix) = ('') x 4;
345 for ($p=0; $p <= 2 && $p < @$paras; $p++) {
346 #print STDERR "[",join("][",@{$paras}[0..2]),"]\n";
347 if (defined $paras->[$p]) {
348 # Pull out the part of the paragraph prior to a blank line
349 my @split = split /^(\s*\n)/, $paras->[$p], 2;
350 my ($pre_p, $post_p) = @split > 1 ? @split :
351 ($paras->[$p], '');
352 # May need to split the first paragraph
353 if ($pre_p =~ /^($BULLETS)(?: |\n)/so) {
354 # Bulleted list
355 if ((my @s = split /^(?![$1]| )(.)/m, $pre_p, 2) > 1) {
356 # Bulleted list has unexpected unindent
357 splice(@$paras, $p, 1,($s[0],
358 # This is a sentinel that an error occurred
359 "\n" . q(system_message(2, $source, $lineno, "Bullet list ends without a blank line; unexpected unindent.")),
360 "$s[1]$s[-1]$post_p"));
363 elsif ($pre_p =~ /^($LINE_BLOCK)(?: |\n)/so) {
364 # Line block
365 if ((my @s = split /^(?!$LINE_BLOCK(?:\s+\S|\n)| )(.)/m,
366 $pre_p, 2) > 1) {
367 # Line block has unexpected unindent
368 splice(@$paras, $p, 1,($s[0],
369 # This is a sentinel that an error occurred
370 "\n" . q(system_message(2, $source, $lineno, "Line block ends without a blank line.")),
371 "$s[1]$s[-1]$post_p"));
374 elsif ($pre_p =~ /^$SECTION_HEADER/om) {
376 elsif ($pre_p =~ /^((\.\.|__)( |\n))/) {
377 # A comment or anonymous target
378 $pre_p =~ s/^(.*\n?)//;
379 my $first = $1;
380 if ((my @s = split /^((?:\.\.|__)(?: |\n))/m, $pre_p, 2) > 1){
381 splice(@$paras, $p, 1, "$first$s[0]", "", "$s[1]$s[-1]");
384 elsif ($pre_p =~ /^( |\n)/) {
385 # These get dealt with elsewhere
387 elsif ($pre_p =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o) {
388 # An enumerated list
389 my ($prefix,$index,$suffix) = map(defined $_ ? $_ : '',
390 ($1,$2,$3));
391 my $type = EnumType($index);
392 $type = 'arabic' if $type eq '#';
393 my $val = EnumVal($index, $type);
394 $pre_p =~ s/^(.*\n)//;
395 my $first = $1;
396 my @enum_list;
397 while ($pre_p ne '') {
398 if ((my @s = split /^($ENUM .*\n(?=\Z|\n| |$ENUM))/mo,
399 $pre_p, 2) > 1) {
400 # Check for out-of-sequence enumerated list item
401 my ($pf,$in,$sf) = map(defined $_ ? $_ : '',
402 @s[2..4]);
403 my $v = EnumVal($in, $type);
404 if ($pf ne $prefix || $sf ne $suffix ||
405 ($v ne '#' && $v != $val+1)) {
406 my $enum_list = join('',@enum_list);
407 splice(@$paras, $p, 1, "$enum_list$first$s[0]",
408 "\n" . q(system_message(2, $source, $lineno, "Enumerated list ends without a blank line; unexpected unindent.")),
409 "$s[1]$s[-1]$post_p");
410 last;
412 else {
413 push(@enum_list, "$first$s[0]");
414 $first = $s[1];
415 $pre_p = "$s[-1]";
416 $val++;
419 else {
420 push(@enum_list, "$first$pre_p");
421 $first = "";
422 $pre_p = "";
425 push (@enum_list, $first) if $first ne '';
426 #print "$p: {\n",map("[$_]\n", @enum_list),"}\n";
427 # Check any enumerated lists for unexpected indent
428 my $prev_paras = '';
429 my $enum;
430 while ($enum = shift @enum_list) {
431 my $para = $enum;
432 $para =~ /^($ENUM )/o;
433 my $spaces = " " x length($1);
434 $para =~ s/^(.*\n)//;
435 my $first = $1;
436 if ((my @s = split /^(?!$spaces)(.)/m, $para, 2) > 1) {
437 my $rest = join('',@enum_list);
438 my @items =
440 # This is a sentinel that an error occurred
441 "\n" . q(system_message(2, $source, $lineno, "Enumerated list ends without a blank line; unexpected unindent.")),
442 "$s[1]$s[-1]$rest$post_p");
443 # Enumerated list has unexpected indent
444 splice(@$paras, $p, 1, "$prev_paras$first$s[0]",
445 @items);
446 last;
448 $prev_paras .= "$first$para";
451 elsif ($pre_p =~ /^$FIELD_LIST/) {
452 # A field list
453 if ((my @s = split /^(?! |$FIELD_LIST)(.)/m, $pre_p, 2) > 1) {
454 # Field list has unexpected indent
455 splice(@$paras, $p, 1,($s[0],
456 # This is a sentinel that an error occurred
457 "\n" . q(system_message(2, $source, $lineno, "Field list ends without a blank line; unexpected unindent.")),
458 "$s[1]$s[-1]$post_p"));
461 elsif ($pre_p =~ /^$OPTION_LIST/) {
462 # An option list
463 if ((my @s = split /^(?! |$OPTION_LIST)(.)/m, $pre_p, 2) > 1){
464 # Field list has unexpected indent
465 splice(@$paras, $p, 1,($s[0],
466 # This is a sentinel that an error occurred
467 "\n" . q(system_message(2, $source, $lineno, "Option list ends without a blank line; unexpected unindent.")),
468 "$s[1]$s[-1]$post_p"));
471 elsif (IsTable($pre_p)) {
472 # It's a table
473 if ($pre_p =~ /^[+][+-]+[+] *\n/ &&
474 (my @s = split /^([^|+])/m, $pre_p, 2) > 1) {
475 my $after = "$s[1]$s[-1]$post_p";
476 # Table is missing blank line
477 splice(@$paras, $p, 1, ($s[0],
478 # This is a sentinel that an error occurred
479 "\n" . q(system_message(2, $source, $lineno, "Blank line required after table.")),
480 $after));
481 if ($after =~ /^ /) {
482 splice(@$paras, $p+1, 0,
483 # This is a sentinel that an error occurred
484 "\n" . q(system_message(3, $source, $lineno, "Unexpected indentation.")),
485 "");
490 elsif ($pre_p =~ /^\S.*\n /) {
491 # A definition list
492 if (#$pre_p =~ /^\S.*\n\S/m ||
493 (my @s = split /^(\S.*\n)$/m, $pre_p, 2) > 1) {
494 # Definition list has unexpected indent
495 splice(@$paras, $p, 1,($s[0],
496 # This is a sentinel that an error occurred
497 "\n" . q(system_message(2, $source, $lineno, "Definition list ends without a blank line; unexpected unindent.")),
498 "$s[1]$post_p"));
501 else {
502 # A standard paragraph
503 if ((my @s = split /^( )/m, $pre_p, 2) > 1
504 && $pre_p !~ /:: *$/) {
505 # This "paragraph" has indentation or other problems
506 splice(@$paras, $p, 1,($s[0],
507 # This is a sentinel that an error occurred
508 "\n" . q(system_message(3, $source, $lineno, "Unexpected indentation.")),
509 "$s[1]$s[-1]$post_p"));
513 # Or may need to join consecutive paragraphs
514 if ($p >= 2 &&
515 # Don't consolidate paragraphs with errors in the middle
516 (defined $paras->[$p-1] && $paras->[$p-1] !~ /^\n\S/s &&
518 # Consecutive block quotes
519 (substr($paras->[$p-2],0,1) eq ' ' &&
520 substr($paras->[$p],0,1) eq ' ')
522 # Comments followed by indented text
523 ($paras->[$p-2] =~ /^((\.\. )|(__( |\n)))/ &&
524 $paras->[$p]=~ /^ /)
526 # Consecutive bulleted lists
527 ($paras->[$p-2] =~ /^($BULLETS)(?: |\n)/o &&
528 $paras->[$p] =~ /^(?:[$1]| )/)
530 # Consecutive enumerated lists
531 ($paras->[$p-2] =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o &&
532 #do {print "$p: $paras->[$p-2]~~~~~~$paras->[$p]========"; 1; } &&
533 do {
534 my ($prefix, $index, $suffix) =
535 map defined $_ ? $_ : '', ($1, $2, $3);
536 my $type = EnumType($index);
537 if (($type ne $enumtype && $type ne '#') ||
538 $prefix ne $enumprefix ||
539 $suffix ne $enumsuffix) {
540 $enumtype = $type;
541 $enumtype = 'arabic' if $enumtype eq '#';
542 $enumprefix = $prefix;
543 $enumsuffix = $suffix;
544 $enumval = EnumVal($index, $type);
545 $enumval = 1 if $enumval eq '#';
547 ($paras->[$p] =~ /^ / ||
548 $paras->[$p] =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o &&
549 #do { print "$prefix-$enumtype-$enumval-$suffix vs $1-$enumtype-$2-$3\n"; 1; } &&
550 do {
551 my $val = EnumVal($2, $enumtype);
552 $val = $enumval + 1 if $val eq '#';
553 my $oldval = $enumval;
554 $enumval = $val;
555 $val == $oldval+1;
557 && ($1 || '') eq $enumprefix && ($3 || '') eq $enumsuffix)
560 # Incomplete simple table
561 ($paras->[$p-2] =~ /^=+( +=+)+ *\n/ &&
562 $paras->[$p] =~ /^\001/)
564 # Consecutive field lists
565 ($paras->[$p-2] =~ /^$FIELD_LIST/o &&
566 $paras->[$p]=~ /^($FIELD_LIST| )/o)
568 # Consecutive definition lists
569 ($paras->[$p-2] =~ /^(?!\.\.|__( |\n)|$OPTION_LIST)\S.*\n /o &&
570 $paras->[$p]=~ /^(?!\.\.|__( |\n)|$OPTION_LIST|$FIELD_LIST|$BULLETS( |\n)|$ENUM )(\S.*\n)? /o)
572 # Consecutive option lists
573 ($paras->[$p-2] =~ /^$OPTION_LIST/o &&
574 $paras->[$p]=~ /^(($OPTION_LIST)| )/o)
575 ))) {
576 #print STDERR "Coalescing: [$paras->[$p-2]]\n[$paras->[$p-1]]\n[$paras->[$p]]\n";
577 splice(@$paras, $p-2, 3, "$paras->[$p-2]$paras->[$p-1]$paras->[$p]");
578 $p--;
583 # Defines a new role, optionally based upon an existing role
584 # Arguments: new role name, optional old role name, optional option key/values
585 # Returns: possible error message
586 sub DefineRole {
587 my ($role, $tag, %options) = @_;
589 $tag = 'inline' unless defined $tag;
590 return qq(cannot make "$role" into a class name.)
591 unless $role =~ /[a-z][-\w\.]*/i;
592 my $class = defined $options{class} ? $options{class} : $role;
593 return qq(invalid option value: (option: "class"; value: '$class')\ncannot make "$class" into a class name.)
594 unless $class =~ /[a-z][-\w\.]*/i;
595 # Default all options, etc. from the base tag
596 $RST::MY_ROLES{$role} = Util::DeepCopy($RST::MY_ROLES{$tag});
597 $RST::MY_ROLES{$role}{tag} = $RST::MY_ROLES{$tag}{tag};
598 $RST::MY_ROLES{$role}{attr}{classes} = [ $class ];
599 # Process format, prefix and suffix options
600 if (defined $options{format}) {
601 $RST::MY_ROLES{$role}{attr}{format} = $options{format};
602 delete $options{format};
604 $options{prefix} = HashifyFieldList($options{prefix}) if $options{prefix};
605 $options{suffix} = HashifyFieldList($options{suffix}) if $options{suffix};
606 # Merge any local options with the options of the underlying class
607 @{$RST::MY_ROLES{$role}{options}}{keys %options} =
608 values %options if %options;
609 return;
612 # Processes a definition list paragraph.
613 # Arguments: paragraph, source, line number
614 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
615 sub DefinitionList {
616 my($para, $source, $lineno) = @_;
618 my $dom = new DOM('definition_list');
619 my ($processed, @unp);
621 (undef, my @paras) = split /^((?:\S.*)\n(?: +))/m, $para;
622 while (@paras > 1 && (my ($item, $def) = splice @paras, 0, 2)) {
623 my $para = "$item$def";
624 $para =~ /^(\S.*)\n( +)/;
625 my ($term, $spaces) = ($1, $2);
626 my $dli = new DOM('definition_list_item');
627 $dom->append($dli);
628 my $class = '';
629 my @errs;
630 if ($term =~ /:: *$/) {
631 push (@errs, system_message(1, $source, $lineno+1,
632 qq(Blank line missing before literal block (after the "::")? Interpreted as a definition list item.)));
634 # We have to handle the case where the ' : ' is
635 # within a literal quote.
636 # Get rid of all literal quotes
637 my %strings;
638 $term =~ s/(\A| )(``((?!``).)*``)(?=[ \n\]\):;])/
639 my $v = $2; my $s = \$v; bless $s,"STR"; $strings{$s}=$2; "$1$s"/ge;
640 if ($term !~ /^``((?!``).)*``/ && $term =~ /(.*?) : (.*)/) {
641 $term = $1;
642 $class = $2;
644 # Put the literal quotes back
645 $term =~ s/(STR=SCALAR\(0x[0-9a-f]+\))/$strings{$1} || $1/ge;
646 $class =~ s/(STR=SCALAR\(0x[0-9a-f]+\))/$strings{$1} || $1/ge;
647 my $def = new DOM('definition');
648 my $t = new DOM('term');
649 push(@errs, Inline($t, $term, $source, $lineno));
650 $dli->append($t);
651 if ($class ne '') {
652 my @classifiers = split / +: +/, $class;
653 foreach (@classifiers) {
654 my $classifier = new DOM('classifier');
655 push(@errs, Inline($classifier, $_, $source, $lineno));
656 $dli->append($classifier);
659 $def->append(@errs);
660 $dli->append($def);
661 $para =~ s/^(.*\n)//;
662 my $first = $1;
663 if ((my @s = split /\n(\S)/, $para, 2) > 1) {
664 # Check for unexpected unindents
665 $para = (shift @s) . "\n";
666 @paras = join '', @s, @paras;
668 $para =~ s/^$spaces//mg;
669 Paragraphs($def, $para, $source, $lineno+1);
670 $para = "$first$para";
671 $lineno += $para =~ tr/\n//;
672 $processed .= $para;
674 my $unp = join('',@paras);
675 if ($unp !~ /^$/) {
676 push @unp, system_message(2, $source, $lineno,
677 "Definition list ends without a blank line; unexpected unindent.");
678 push @unp, $unp;
680 return ($processed, $dom, @unp);
683 # Parses a directive and attaches it to a DOM if successful.
684 # Arguments: DOM object, source, line number, error message id,
685 # directive text, paragraph literal
686 # Returns: error flag,
687 # reference to array of DOM objects (possibly including input DOM),
688 # reference to array of unparsed paragraphs.
689 sub Directive {
690 my ($parent, $source, $lineno, $errmsgid, $dtext, $lit) = @_;
691 #print STDERR "Directive(",join(',',@_),")\n";
693 my @dom;
694 my @unprocessed;
695 my $error = 1;
696 $dtext =~ /(\s*)([\w.-]+)\s*:: *(.*)/s;
697 my ($pre, $directive, $body) = map defined $_ ? $_ : '',($1, $2, $3);
698 my $dname = $directive;
699 $directive =~ tr/[A-Z].-/[a-z]__/;
700 #print STDERR "[$pre][$directive][$body]\n";
701 my $subst = $parent->{tag} eq 'substitution_definition' ?
702 $parent->{attr}{names}[0] : '';
704 if ($dtext eq "\n") {
705 push(@dom, system_message(2, $source, $lineno,
706 qq($errmsgid "$subst" missing contents.),
707 $lit));
709 elsif ($directive eq '') {
710 push(@dom, system_message(2, $source, $lineno,
711 qq($errmsgid "$subst" empty or invalid.),
712 $lit))
713 if $subst ne '';
715 else {
716 if (! defined $DIRECTIVES{$directive}) {
717 # First see if there's a routine defined for it
718 my $d = "RST::Directive::$directive";
719 $DIRECTIVES{$directive} = \&$d if defined &$d;
721 if (! defined $DIRECTIVES{$directive}) {
722 push(@dom,system_message(1, $source, $lineno,
723 qq(No directive entry for "$dname" in module "RST::Directive".\nTrying "$dname" as canonical directive name.)));
724 eval("use Directive::$directive");
725 die "Error compiling $directive: $@" if $@ && ! $@ =~ /in \@INC/;
726 return 1, \@dom, [$lit] if defined $DIRECTIVES{$directive};
728 if ( defined $DIRECTIVES{$directive}) {
729 my $mylit = $parent->{tag} eq 'substitution_definition' ? $dtext :
730 $lit;
731 my @dir = eval {
732 &{$DIRECTIVES{$directive}}
733 ($dname, $parent, $source, $lineno, $dtext, $mylit); };
734 push(@dom, system_message(4, $source, $lineno,
735 qq(Error processing directive "$dname": $@),
736 $lit))
737 if $@;
738 my @doms = grep(/^DOM/, @dir);
739 push(@unprocessed,
740 map(split(/^(\s*\n)+/m, $_),grep(!/^DOM/, @dir)));
741 if (@doms >= 1 && $doms[0]{tag} eq 'system_message' || @dir == 0)
743 push(@dom, @doms);
744 push(@dom, system_message(2, $source, $lineno,
745 qq($errmsgid "$subst" empty or invalid.),
746 $lit))
747 if $subst ne '';
749 else {
750 $parent->append(@doms);
751 if ($parent->{tag} eq 'substitution_definition') {
752 my $err = RegisterName($parent, $source, $lineno);
753 push (@dom, $err) if $err;
755 $error = 0;
758 else {
759 push(@dom, system_message(3, $source, $lineno,
760 qq(Unknown directive type "$dname".),
761 $subst eq '' ? $lit : $dtext));
762 push(@dom, system_message(2, $source, $lineno,
763 qq($errmsgid "$subst" empty or invalid.),
764 $lit))
765 if $subst ne '';
768 #print STDERR "Directive -> [",join(',',@dom),"][",join(',',@unprocessed),"]\n";
769 return ($error, \@dom, \@unprocessed);
772 # Processes a enumerated list paragraph.
773 # Arguments: paragraph, source, line number
774 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
775 sub EnumList {
776 my($para, $source, $lineno) = @_;
778 my $lines = 0;
779 my ($processed, @unp, @err);
780 $para =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o;
781 my ($prefix, $index, $suffix) = map defined $_ ? $_ : '', ($1, $2, $3);
782 my $type = EnumType($index);
783 $type = 'arabic' if $type eq '#';
784 my $dom = new DOM('enumerated_list', enumtype=>$type,
785 prefix=>"$prefix", suffix=>$suffix);
786 my $val = EnumVal($index, $type);
787 $val = 1 if $val eq '#';
788 if ($val != 1) {
789 $dom->{attr}{start} = $val;
790 push(@err,
791 system_message(1, $source, $lineno,
792 qq(Enumerated list start value not ordinal-1: "$index" (ordinal $val))));
795 while ($para =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o) {
796 my $next = '';
797 my $li = new DOM('list_item');
798 $dom->append($li);
799 $para =~ s/^($ENUM )\s*//o;
800 my $marker = $1;
801 my $spaces = " " x length($marker);
802 # See if there are any subsequent enumerated lists
803 if ((my @s = split /^((?!\A)$ENUM .*\n(?=\Z|\n| |$ENUM))/om, $para, 2)
804 > 1) {
805 $para = $s[0];
806 $next = "$s[1]$s[-1]";
809 $para =~ s/^$spaces//mg;
810 Paragraphs($li, $para, $source, $lineno+$lines);
811 $lines += $para =~ tr/\n//;
812 $processed .= $para;
813 $para = $next;
816 return ($processed, $dom, @err, @unp);
819 # Given the initial index of an enumerated list, returns the enumeration type.
820 # Arguments: Initial index
821 # Returns: one of "arabic", "loweralpha", "upperalpha", "lowerroman",
822 # "upperroman" or "#" (for auto-enumerated)
823 sub EnumType {
824 my ($index) = @_;
825 BEGIN { @ENUM_STRINGS = ('arabic', 'loweralpha', 'upperalpha',
826 'lowerroman', 'upperroman', '#'); }
827 my @matches =
828 $index=~/^(?:([0-9]+)|([a-hj-z])|([A-HJ-Z])|([ivxlcdm]+)|([IVXLCDM]+))|(\#)$/;
829 my @defs = grep(defined $matches[$_], 0 .. 5);
830 my $type = defined $defs[0] ? $ENUM_STRINGS[$defs[0]] : 'error';
831 return $type;
834 # Given an index of an enumerated and the enumeration type, returns the
835 # index value.
836 # Arguments: Index, enumerated type
837 # Returns: number or -1 (for badly formatted Roman numerals/arabic)
838 sub EnumVal {
839 my ($index, $enumtype) = @_;
840 BEGIN {
841 @ALPHA_INDEX{'a' .. 'z'} = (1 .. 26);
842 %ROMAN_VALS = (i=>1, v=>5, x=>10, l=>50, c=>100, d=>500, m=>1000);
844 # Handle autonumber
845 return $index if $index eq '#';
846 # First handle arabic
847 return $index =~ /^\d+$/ ? $index : -1 if $enumtype eq 'arabic';
848 # Deal with alpha types
849 $index =~ tr/A-Z/a-z/;
850 return defined $ALPHA_INDEX{$index} ? $ALPHA_INDEX{$index} : -1
851 if $enumtype =~ /alpha/;
852 # Now left with roman numerals
853 return -1 if $index !~ /^m{0,4}(?:dc{0,3}|c[dm]|c{0,3})?(?:lx{0,3}|x[lc]|x{0,3})?(?:vi{0,3}|i[vx]|i{0,3})?$/;
854 my $val = 0;
855 my @chars = split(//, $index);
856 while (@chars) {
857 my $charval = $ROMAN_VALS{shift @chars};
858 if (@chars == 0 || $charval >= $ROMAN_VALS{$chars[0]}) {
859 $val += $charval;
861 else {
862 $val += $ROMAN_VALS{shift @chars} - $charval;
865 return $val;
868 # Processes an explicit markup paragraph.
869 # Arguments: parent, paragraph, source, line number
870 # Returns: processed paragraph, new parent,
871 # list of DOM objects and unprocessed paragraphs
872 sub Explicit {
873 my($parent, $para, $source, $lineno) = @_;
874 #print "Explicit(",join(',',@_),")\n";
876 my $new_parent = $parent;
877 my $lines = 0;
878 my ($processed, @unp, @err, @dom);
880 # Check for the end of the explicit markup block
881 my $badindent = 0;
882 if ((my @s = split /^(?!\A|\n|\Z| )/m, $para, 2) > 1) {
883 push(@unp, $s[1]);
884 $para = $s[0];
885 $badindent = 1;
887 $processed = $para;
889 $para =~ /^(?:\.\.|(__))(?: (?:\[((?:[\#*])?[\w.-]*)\] *|\|(?! )([^\|]*\S)\| *|(_.*:.*)|([\w\.-]+\s*::.*))?)?(.*)/s;
890 my ($anon, $footnote, $subst, $target, $dir, $next) =
891 ($1, $2, $3, $4, $5, $6);
892 my $btext = $next
893 if defined $footnote || defined $subst || defined $dir;
894 if (substr($para,0,3) eq "..\n") {
895 my $undef;
896 ($anon, $footnote, $target) = ($undef) x 3;
898 if ($anon) {
899 $target = "$anon:$next";
901 #print "[$anon][$footnote][$target]\n";
902 if (defined $footnote) {
903 # It's a footnote or citation
904 my %attr;
905 my $tag = 'footnote';
906 if ($footnote =~ /^([\#*])(.*)/) {
907 my ($auto, $name) = ($1, $2);
908 $attr{auto} = $auto eq '#' ? 1 : $auto;
909 if ($name ne '') {
910 $attr{names} = [ NormalizeName($name) ];
911 $attr{ids} = [ NormalizeId($name) ];
914 elsif ($footnote !~ /^\d+$/) {
915 $tag = 'citation';
916 $attr{names} = [ NormalizeName($footnote) ];
917 $attr{ids} = [ NormalizeId($footnote) ];
919 else {
920 $attr{names} = [ $footnote ];
922 $attr{ids} = [ Id() ] unless defined $attr{ids} ;
923 my $dom = new DOM($tag, %attr);
924 if ($footnote !~ /^[\#*]/) {
925 my $label = new DOM('label');
926 $label->append(newPCDATA DOM($footnote));
927 $dom->append($label);
929 my $err = RegisterName($dom, $source, $lineno);
930 $dom->append($err) if $err;
931 # Get rid of indentation spaces
932 $btext =~ /^(?!\A)( +)/m;
933 my $spaces = $1 || '';
934 $btext =~ s/^$spaces//mg;
935 my @redo;
936 Paragraphs($dom, $btext, $source, $lineno);
937 push(@dom, $dom);
939 elsif (defined $subst) {
940 # It's a substitution definition
941 my $dom = new DOM('substitution_definition',
942 names=>[ NormalizeName($subst, 'keepcase') ]);
943 my ($err, $doms, $unp) =
944 Directive($dom, $source, $lineno, 'Substitution definition',
945 $btext, $para);
946 push(@dom, @$doms);
947 push(@dom, $dom) unless $err;
948 $processed = '' if @$unp && $unp->[0] eq $para;
949 unshift(@unp, @$unp);
951 elsif (defined $target) {
952 # It's a hyperlink target
953 my %attr;
954 my $dom;
955 my %char_class = ('`'=>'.', ''=>"[^:]");
956 $target =~ /^(_((?:\\:|[^:])+): *)(.*)/s
957 unless $target =~ /^(_\`((?:.|\n)+)\`: *)(.*)/s;
958 my ($id, $uri) = ($2 || '', $3);
959 if ($id eq '_') {
960 $attr{anonymous} = 1;
961 $id = Id();
963 my $t = $1;
964 my $indent = $anon ? 3 :
965 $uri =~ /^./ ? length($t)+($anon ? 0 : 3) :
966 do { $uri =~ /\n( +)/; length($1 || '') };
967 my $spaces = ' ' x $indent;
968 if ($uri =~ /^(?:\`((?:.|\n)*)\`|([\w.-]+))_$/) {
969 my $name = main::FirstDefined($1, $2);
970 # Get rid of newline-indents
971 $name =~ s/\n$spaces/ /g;
972 $attr{refname} = NormalizeName($name);
974 else {
975 # Get rid of newline-indents
976 $uri =~ s/\n$spaces//g;
977 $uri =~ s/\n //g;
978 chomp $uri;
979 $uri =~ s/ *//g;
980 $uri =~ s/\\(.)/$1/g;
981 if ($uri ne '') {
982 $uri = "mailto:$uri"
983 if $uri !~ /^$URIre::scheme:/ &&
984 $uri =~ /\@/ && $uri !~ /^\`.*\`$/;
985 $uri = $1 if $uri =~ /^\`(.*)\`$/;
986 $attr{refuri} = $uri;
989 $attr{names} = [ NormalizeName($id) ]
990 unless $attr{anonymous};
991 $dom = new DOM('target', ids=>[ NormalizeId($id) ], %attr);
992 my $err = RegisterName($dom, $source, $lineno);
993 push (@dom, $err) if $err;
994 push (@dom, $dom);
996 elsif (defined $dir) {
997 # It's a directive
998 my ($err, $doms, $unp) =
999 Directive($parent, $source, $lineno, 'Directive', "$dir$btext",
1000 $para);
1001 push(@dom, @$doms);
1002 unshift(@unp, @$unp);
1003 $processed = '' if @$unp && $unp->[0] eq $para;
1004 $new_parent = $SEC_DOM[-1]
1005 if $parent->{tag} =~ /^(document|section)$/;
1007 else {
1008 # It's a comment
1009 $para =~ s/^(\.\.\s*)//;
1010 my $first = $1;
1011 if ($para =~ /^( +)/m) {
1012 my $spaces = $1;
1013 $para =~ s/^$spaces//mg;
1015 my $dom = new DOM('comment', %XML_SPACE);
1016 $dom->append(newPCDATA DOM($para))
1017 if $para ne '';
1018 $para = "$first$para";
1019 push(@dom, $dom);
1022 if ($badindent) {
1023 push(@dom,
1024 system_message(2, $source, $lineno + ($para =~ tr/\n//),
1025 "Explicit markup ends without a blank line; unexpected unindent."))
1026 unless substr($unp[-1], 0, 2) eq "..";
1028 # Annote the dom object with source, lineno, and lit
1029 foreach (@dom) {
1030 if ($_->{tag} ne 'system_message') {
1031 $_->{source} = $source;
1032 $_->{lineno} = $lineno;
1033 $_->{lit} = $processed;
1034 chomp $_->{lit};
1037 return ($processed, $new_parent, @err, @dom, @unp);
1040 # Processes a field list paragraph.
1041 # Arguments: paragraph, source, line number
1042 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
1043 sub FieldList {
1044 my($para, $source, $lineno) = @_;
1046 my $dom = new DOM('field_list');
1047 my $lines = 0;
1048 my ($processed, @unp);
1049 (undef, my @paras) = split /^($FIELD_LIST)/om, $para;
1050 while (my ($fl, $b) = splice @paras, 0, 2) {
1051 my ($name, $para) = "$fl$b" =~ /^:([^:\n]+): *(.*)/s;
1052 my $field = new DOM('field');
1053 $field->{source} = $source;
1054 $field->{lineno} = $lineno+$lines;
1055 $dom->append($field);
1056 my $n = new DOM('field_name');
1057 my $body = new DOM('field_body');
1058 $field->append($n, $body);
1059 $body->append(Inline($n, $name, $source, $lineno+$lines));
1060 # Remove initial spaces
1061 my @spaces = $para =~ /^(?!\A)( +)/mg;
1062 my $spaces = defined $spaces[0] ? $spaces[0] : '';
1063 foreach (@spaces) {
1064 $spaces = $_ if length($_) < length($spaces);
1066 $para =~ s/^$spaces//mg;
1067 Paragraphs($body, $para, $source, $lineno+$lines);
1068 $lines += $para =~ tr/\n//;
1069 $processed .= $para;
1072 return ($processed, $dom, @unp);
1075 # Takes a field list and turns it into a hash
1076 # Arguments: text of field list
1077 # Returns: hash reference
1078 sub HashifyFieldList {
1079 my ($text) = @_;
1081 my %hash;
1082 if ($text =~ /^ *($RST::FIELD_LIST)/mo) {
1083 my @fields = split /^(?=:)/m, $text;
1084 foreach my $field (@fields) {
1085 next unless $field =~ /^:([^:\n]*): *(.*)/s;
1086 my ($fname,$val) = ($1, $2);
1087 chomp $val;
1088 $hash{$fname} = $val;
1092 return \%hash;
1095 # Returns the next identifier.
1096 # Arguments: None
1097 # Uses global: $RST::NEXT_ID
1098 sub Id {
1099 return "id" . ++$NEXT_ID;
1102 # Parses inline markup.
1103 # Arguments: DOM object of parent, text to parse, source, line number
1104 # Returns: list of system_message DOMs if errors
1105 sub Inline {
1106 my ($parent, $text, $source, $lineno) = @_;
1107 #print STDERR "Inline($parent,$text)\n";
1109 my @problems;
1111 my ($is_start, $pre, $start, $next, $pending, $processed);
1113 ($is_start, $pre, $start, $next) = InlineStart($text);
1114 while ($is_start) {
1115 $pending .= $pre;
1116 $text = $next;
1118 # Is there an end for this start?
1119 my ($is_end, $mid, $end, $next1) = InlineEnd($text, $start);
1120 if (! $is_end) {
1121 # We don't have an end
1122 if ($start =~ /^(\[|)$/) {
1123 $pending .= "$start$mid";
1124 $text = $next1;
1126 else {
1127 $lineno += $pending =~ tr/\n//;
1128 $pending = RemoveBackslashes($pending);
1129 $parent->append(newPCDATA DOM($pending))
1130 if $pending ne '';
1131 $pending = "";
1132 # We have something problematic here
1133 my ($dom,$refid,$id) = problematic($start);
1134 $parent->append($dom);
1135 my $err = system_message(2, $source, $lineno,
1136 "Inline $MARK_TAG_START{$start} start-string without end-string.",
1137 "", backrefs=>[ $id ],
1138 ids=>[ $refid ]);
1139 push (@problems, $err);
1142 else {
1143 my $lit = "$start$mid$end";
1144 my %attr;
1145 if ($MARK_TAG_START{$start} =~ /interpreted/ &&
1146 $pending =~ s/:([-\w\.]+):$//) {
1147 $attr{role} = $1;
1148 $attr{position} = 'prefix';
1151 $lineno += $pending =~ tr/\n//;
1152 $pending = RemoveBackslashes($pending);
1153 $parent->append(newPCDATA DOM($pending))
1154 if $pending ne '';
1155 $pending = '';
1156 $text = $next1;
1157 my @content;
1158 my @errs;
1159 my $tag = $MARK_TAG{"$start$end"};
1160 my $implicit;
1161 if (! defined $tag && $start eq '') {
1162 # This must be an implicit markup
1163 $mid = "$mid$end";
1164 $end = "_";
1165 $tag = 'reference';
1166 $implicit = 1;
1168 if ($tag eq 'interpreted' && $text =~ s/^:([-\w\.]+)://) {
1169 my $role = $1;
1170 if (defined $attr{role}) {
1171 # We have something problematic here
1172 my ($dom,$refid,$id) = problematic(":$attr{role}:`$mid`:$role:");
1173 $parent->append($dom);
1174 my $err = system_message(2, $source, $lineno,
1175 "Multiple roles in interpreted text (both prefix and suffix present; only one allowed).",
1176 "", backrefs=>[ $id ],
1177 ids=>[ $refid ]);
1178 push (@problems, $err);
1179 last;
1181 elsif ($text =~ s/^(__?)//) {
1182 # We have something problematic here
1183 my ($dom,$refid,$id) = problematic("`$mid`:$role:$1");
1184 $parent->append($dom);
1185 my $err = system_message(2, $source, $lineno,
1186 "Mismatch: both interpreted text role suffix and reference suffix.",
1187 "", backrefs=>[ $id ],
1188 ids=>[ $refid ]);
1189 push (@problems, $err);
1190 last;
1192 else {
1193 $attr{role} = $role;
1194 $attr{position} = 'suffix';
1197 elsif ($tag =~ /reference/) {
1198 if (defined $attr{role}) {
1199 # We have something problematic here
1200 my ($dom,$refid,$id) = problematic(":$attr{role}:`$mid$end");
1201 $parent->append($dom);
1202 my $err = system_message(2, $source, $lineno,
1203 "Mismatch: both interpreted text role prefix and reference suffix.",
1204 "", backrefs=>[ $id ],
1205 ids=>[ $refid ]);
1206 push (@problems, $err);
1207 last;
1210 my $name = $mid;
1211 $name =~ s/\n/ /g;
1212 my $uri;
1214 if ($tag eq 'substitution_reference' && $end =~ /_$/) {
1215 # Need to create a new level of reference
1216 my $dom = new
1217 DOM ($tag, refname=>NormalizeName($name, 'keepcase'));
1218 $dom->append(newPCDATA DOM($mid));
1219 $dom->{source} = $source;
1220 $dom->{lineno} = $lineno;
1221 $dom->{lit} = $lit;
1222 push (@content, $dom);
1223 $tag = 'reference';
1224 $mid = "";
1226 $mid =~ /((?:\s|\A)<([^ <][^<]*[^ ])>)$/;
1227 my $embeduri = $2;
1228 if ((defined $embeduri || $implicit) &&
1229 do {$uri = $implicit ? $mid : $embeduri;
1230 $uri =~ s/\s//g;
1231 $uri =~ /^($URIre::URI_reference|$EMAIL)$/o}) {
1232 # Implicit references may pick up extra punctuation at
1233 # the end.
1234 if ($pre ne '<' && substr($next1,0,1) ne '>' &&
1235 $implicit && $uri =~ /(.*)([\)\]\};:\'\",.>\?])$/) {
1236 my ($newuri, $lastchar) = ($1, $2);
1237 if (defined $LEFT_BRACE{$lastchar}) {
1238 # It's a close brace/paren/bracket/angle bracket
1239 # It's part of the URI if the URI would otherwise
1240 # be unmatched.
1241 my $rb = "\\$lastchar";
1242 my $lb = "\\$LEFT_BRACE{$lastchar}";
1243 my $nleft = $newuri =~ s/($lb)/$1/g;
1244 my $nright = $newuri =~ s/($rb)/$1/g;
1245 if ($nleft <= $nright) {
1246 $mid = $uri = $newuri;
1247 $text = "$lastchar$text";
1250 else {
1251 $mid = $uri = $newuri;
1252 $text = "$lastchar$text";
1255 $uri =~ s/\\(.)/$1/g; # Remove backslash quotes
1256 $uri = "mailto:$uri"
1257 if $uri !~ /^$URIre::scheme:/o && $uri =~ /^$EMAIL$/o;
1258 $attr{refuri} = $uri;
1259 $mid =~ s/^<(.*)>$/$1/ if $embeduri;
1260 $mid =~ s/(\s<[^<]+>)$//;
1261 my $miduri = defined $1 ? $1 : '';
1262 $attr{name} = $mid unless $implicit;
1263 $lineno += $miduri =~ tr/\n//;
1265 elsif ($end =~ /__/) {
1266 $attr{anonymous} = 1;
1267 $attr{name} = NormalizeName($mid, 'keepcase')
1268 if $mid ne '';
1270 else {
1271 $tag = 'citation_reference'
1272 if $start eq '[' && $name !~ /^([\#*].*|\d+)$/;
1273 if ($tag =~ /footnote|citation/ && $name =~ /^[\#*](.*)/) {
1274 $attr{auto} = substr($name,0,1) eq '*' ? '*' : 1;
1275 $mid = "";
1276 $name = $1;
1278 if ($name ne '') {
1279 $attr{refname} = $tag eq 'substitution_reference' ?
1280 NormalizeName($name, 'keepcase') :
1281 NormalizeName($name);
1282 $attr{name} = NormalizeName($name, 'keepcase')
1283 if $tag eq 'reference' && $start ne '|';
1286 if ($tag =~ /footnote|citation/) {
1287 $attr{ids} = [ Id() ];
1290 elsif ($tag eq 'target') {
1291 $attr{ids} = [ NormalizeId($mid) ];
1292 $attr{names} = [ NormalizeName($mid) ];
1294 # Do parse-time interpretations of interpreted text
1295 my $was_interpreted;
1296 my $suffix;
1297 if ($tag eq 'interpreted') {
1298 $lit = $attr{position} eq 'prefix' ?
1299 ":$attr{role}:$lit" : "$lit:$attr{role}:"
1300 if defined $attr{role};
1301 $attr{role} = $MY_DEFAULT_ROLE if ! defined $attr{role};
1302 if (defined $attr{role} && defined $MY_ROLES{$attr{role}}) {
1303 my $role = $MY_ROLES{$attr{role}};
1304 $role = $MY_ROLES{$role->{alias}}
1305 while defined $role->{alias};
1306 my @errs = &{$role->{check}}($mid, $lit, $parent,
1307 $source, $lineno, $attr{role})
1308 if defined $role->{check};
1309 # delete $attr{role};
1310 delete $attr{position};
1311 if (@errs) {
1312 push @problems, @errs;
1313 last;
1315 $tag = $role->{tag};
1316 if (defined $role->{attr}) {
1317 foreach my $attr (keys %{$role->{attr}}) {
1318 $attr{$attr} =
1319 ref($role->{attr}{$attr}) eq 'ARRAY' ?
1320 $role->{attr}{$attr} :
1321 sprintf $role->{attr}{$attr}, $mid;
1324 $mid = sprintf $role->{text}, $mid
1325 if defined $role->{text};
1326 my $options = $role->{options};
1327 if ($options && $options->{prefix} &&
1328 defined (my $pfx = ($options->{prefix}{$main::opt_w} ||
1329 $options->{prefix}{default}))) {
1330 my $raw = new DOM('raw', format=>$main::opt_w);
1331 $raw->append(newPCDATA DOM($pfx));
1332 $parent->append($raw);
1334 if ($options && $options->{suffix} &&
1335 defined (my $sfx = ($options->{suffix}{$main::opt_w} ||
1336 $options->{suffix}{default}))) {
1337 $suffix = new DOM('raw', format=>$main::opt_w);
1338 $suffix->append(newPCDATA DOM($sfx));
1340 $was_interpreted = 1;
1342 elsif (defined $attr{role}) {
1343 # We have something problematic here
1344 my ($dom,$refid,$id) = problematic($lit);
1345 $parent->append($dom);
1346 push @problems, UnknownRole($attr{role}, $source, $lineno, '',
1347 backrefs=>[ $id ],
1348 ids=>[ $refid ]);
1349 last;
1352 my $dom = new DOM($tag, %attr);
1353 $dom->{source} = $source;
1354 $dom->{lineno} = $lineno;
1355 $dom->{lit} = $lit;
1356 if ($attr{role}) {
1357 $dom->{role} = $attr{role};
1358 delete $dom->{attr}{role};
1360 if ($tag eq 'target') {
1361 my $err = RegisterName($dom, $source, $lineno);
1362 push (@problems, $err) if $err;
1364 $parent->append($dom);
1365 $parent->append($suffix) if $suffix;
1366 if ($tag =~ /^(literal)$/ && ! $was_interpreted || $implicit ||
1367 $tag eq 'raw' || ! $main::opt_D{nestinline}) {
1368 $mid = RemoveBackslashes($mid)
1369 if $tag !~ /^(literal|raw)$/;
1370 $dom->append(newPCDATA DOM($mid))
1371 if $mid ne '';
1373 else {
1374 @errs = Inline($dom, $mid, $source, $lineno)
1375 if $mid ne '';
1376 push @problems, @errs;
1378 $dom->append(@content);
1379 if ($tag eq 'reference' && defined $attr{refuri} &&
1380 $end !~ /__/ && ! $implicit && ! $was_interpreted) {
1381 my $dom = new DOM('target',
1382 refuri=>$attr{refuri},
1383 ids=>[ NormalizeId($mid) ],
1384 names=>[ NormalizeName($mid) ]);
1385 my $err = RegisterName($dom, $source, $lineno);
1386 push (@problems, $err) if $err;
1387 $parent->append($dom);
1389 $lineno += $mid =~ tr/\n//;
1392 } continue {
1393 ($is_start, $pre, $start, $next) =
1394 InlineStart($text, substr($pending, -1));
1397 $pending .= $text;
1398 $pending = RemoveBackslashes($pending);
1399 $parent->append(newPCDATA DOM($pending))
1400 if $pending !~ /^(\n|)$/;
1402 return @problems;
1405 # Finds the matching end mark for an inline start mark. Works even if there
1406 # is intervening nested markup.
1407 # Arguments: text, start mark, start mark of outer nesting (may be null)
1408 # Returns: boolean indicating match found, text between marks, end mark,
1409 # text after end mark
1410 sub InlineEnd {
1411 #print STDERR ++$dbg::ienest," InlineEnd(",join(',',@_),")\n";
1412 my ($text, $start, $outer_start) = @_;
1413 my ($is_end, $orig_mid, $end, $orig_next) =
1414 InlineFindEnd($text, $start, $outer_start);
1415 my ($full_mid, $next) = ($orig_mid, $orig_next);
1416 goto do_return
1417 unless $is_end && $main::opt_D{nestinline} && $start ne '``';
1419 # We only need to recurse to get it right if the start symbol is
1420 # "*" or "`" (emphasis or interpreted/target)
1421 my ($is_start, $pre, $nest_start, $next2) = InlineStart($text)
1422 if $start =~ /(\*|\`)$/;
1423 my $full_pre = $pre;
1424 while ($is_start && length($full_pre) < length($full_mid)) {
1425 #print STDERR "[$is_start][$full_pre][$full_mid]\n";
1426 my ($nest_is_end, $nest_mid, $nest_end, $nest_next) =
1427 InlineEnd($next2, $nest_start, $start);
1428 if (! $nest_is_end && $nest_start eq '') {
1429 # Check for a later start
1430 $text = "$nest_start$next2";
1431 $full_pre .= substr($text,0,1);
1432 $text = substr($text,1);
1433 ($is_start, $pre, $nest_start, $next2) =
1434 InlineStart($text, substr($full_pre, -1));
1435 $full_pre .= $pre if $is_start;
1437 else {
1438 # Skip over the nested start
1439 $full_pre .= "$nest_start$nest_mid$nest_end";
1440 $full_mid = $full_pre;
1441 $text = $nest_next;
1443 # Look for the next start/end pair
1444 my ($new_end, $mid);
1445 # What we thought was our end may have been swallowed up by
1446 # the nested start; find the new end.
1447 ($nest_is_end, $mid, $new_end, $next) =
1448 InlineFindEnd($text, $start);
1449 if (! $nest_is_end) {
1450 ($full_mid, $next) = ($orig_mid, $orig_next);
1451 last;
1453 $full_mid .= $mid;
1454 ($is_start, $pre, $nest_start, $next2) =
1455 InlineStart($text, substr($full_pre, -1));
1456 $full_pre .= $pre if $is_start;
1457 ($orig_mid, $orig_next, $end) = ($full_mid, $next, $new_end);
1461 do_return:
1462 #print STDERR $dbg::ienest--," InlineEnd->[$is_end][$full_mid][$end][$next]\n";
1463 return ($is_end, $full_mid, $end, $next);
1466 # Finds the first possible matching end mark for an inline start mark. Does
1467 # not take into account intervening nested markup.
1468 # Arguments: text, start mark, start mark of outer nesting (may be null)
1469 # Returns: boolean indicating match found, text between marks, end mark,
1470 # text after end mark
1471 sub InlineFindEnd {
1472 #print STDERR "InlineFindEnd(",join(',',@_),")\n";
1473 my ($text, $start, $nest_start, $null_string_ok) = @_;
1474 my $match = 0;
1475 my ($mid, $end, $after, @problems) = ('', '', '');
1476 my $nest_trailer = defined $nest_start && defined $MARK_END{$nest_start} ?
1477 $MARK_END{$nest_start} : "\001";
1478 while (! $match &&
1479 (my @s = split /((\S|\A)($MARK_END{$start})(?=$MARK_END_TRAILER|$nest_trailer|\n))/, $text, 2) > 1) {
1480 my ($next, $after);
1481 ($next, $end,$after) = ("$s[0]$s[2]", $s[3], $s[-1]);
1482 #print STDERR "$lineno: #$start#$next#$end#$s[-1]\n";
1483 if (("$start$end" =~ /^_/ && $start ne '_`' &&
1484 ($next =~ /([\._-])\1/ || $next =~ /[\`\]]$/)) ||
1485 ($start eq '[' && $next !~ /^(?=.)[\#\*]?[\w\.-]*$/)) {
1486 $mid = "$next$end";
1487 $text = $after;
1488 last;
1490 $match = 1;
1491 my @content;
1492 if ($MARK_TAG_START{$start} ne 'literal') {
1493 if (substr($next,-1, 1) eq "\\") {
1494 # It's backslash quoted; not a real end mark
1495 substr($next,-1, 1) = "\\$end";
1496 $match = 0;
1499 $mid = "$mid$next";
1500 $text = $after;
1502 #print STDERR "InlineFindEnd -> [$match][$mid][$end][$text]\n";
1503 return ($match, $mid, $end, $text);
1506 # Finds the place where inline markup starts.
1507 # Arguments: text, character preceding text
1508 # Returns: Boolean indicating markup was found, characters preceding markup
1509 # start, start markup string, characters after markup start
1510 sub InlineStart {
1511 #print STDERR "InlineStart(",join(',',@_),")\n";
1512 my ($text, $previous) = @_;
1513 my $arg = $text;
1514 my $pre = '';
1516 while (#do{print STDERR "[$text]\n";1} &&
1517 $text =~ m(^(.*?)(^|[-\'\"\(\[\{</: ])(?:($MARK_START)(\S)|(?=(?=[^-_\'\"\(\[\{</: \\\n])(\S*[^\\\s])__?($MARK_END_TRAILER|\n))|(?=$URIre::absoluteURI|$EMAIL))(.*))mos) {
1518 my ($processed, $prechar,$start,$postchar,$after) =
1519 map defined $_ ? $_ : '', ($1,$2,$3,$4,$7);
1520 my $pending = $start if $start eq '[';
1521 $previous = substr($pre, -1) if $pre ne '';
1522 my $pchar = $prechar eq '' ? $previous : $prechar;
1523 #print STDERR "<$pchar><$start><$postchar><$after>\n";
1524 my $validstart = 1;
1525 if (defined $pchar && defined $MATCH_BRACE{$pchar} &&
1526 $postchar eq $MATCH_BRACE{$pchar} ||
1527 $postchar eq $start && $start ne '') {
1528 # It's within quotes
1529 $processed .= "$prechar$start$postchar";
1530 $text = $after;
1531 $validstart = 0;
1533 elsif ($start eq '' && defined $pchar &&
1534 $pchar !~ m!(\A|[-\'\"\(\[\{</: \\])$!) {
1535 # It seems to be a reference, but prechar isn't allowed
1536 my @s = split m!(([-\'\"\(\[\{</: \\])|(__?)(?=$MARK_END_TRAILER|\n))!, $text,2 ;
1537 $pchar = $s[2] if defined $s[2];
1538 my $anon = defined $s[3] ? $s[3] : '';
1539 ($processed,$after) = ("$processed$s[0]$pchar$anon","$s[-1]");
1540 $validstart = 0;
1541 $text = $after;
1543 elsif ($start eq '' &&
1544 (my @s = split /(($URIre::scheme):(?:$URIre::hier_part|$URIre::opaque_part))/o,
1545 $text, 2) > 1) {
1546 # It seems to be implicit markup, but it's not a recognized scheme
1547 my $scheme = $s[2];
1548 if (! $IMPLICIT_SCHEME{$scheme}) {
1549 ($processed, $after) = ("$s[0]$s[1]", "$s[-1]");
1550 $validstart = 0;
1551 $text = $after;
1553 else {
1554 $processed = "$processed$prechar";
1557 else {
1558 $processed = "$processed$prechar";
1559 $after = "$postchar$after";
1561 $pre .= $processed;
1562 #print STDERR "InlineStart -> [$validstart][$pre][$start][$after]\n" if $validstart;
1563 return ($validstart, $pre, $start, $after) if $validstart;
1566 #print STDERR "InlineStart -> 0\n";
1567 return 0;
1570 # Returns whether a reStructuredText string represents a valid table object.
1571 # Arguments: string
1572 # Returns: true or false
1573 sub IsTable {
1574 my ($text) = @_;
1576 chomp $text;
1577 my @lines = split(/\n/, $text);
1578 my $first = $lines[0];
1579 return 0 unless defined $first;
1580 # Check for a validly constructed grid table
1581 if ($first =~ /^[+]([-=]+[+])+ *$/) {
1582 my $l;
1583 for ($l=1; $l < @lines; $l++) {
1584 $_ = $lines[$l];
1585 return 0 unless /^[|+].*[|+] *$/;
1586 return 1 if $l > 1 && /^[+][+-]+[+] *$/;
1588 return 0;
1590 # Check for a validly constructed simple table
1591 elsif ($first =~ /^=+( +=+)+ *$/) {
1592 return 1;
1594 return 0;
1597 # Processes a line block paragraph.
1598 # Arguments: paragraph, source, line number
1599 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
1600 sub LineBlock {
1601 my($para, $source, $lineno) = @_;
1603 my @err;
1604 my $lines = 0;
1605 my ($processed, @unp);
1606 my $dom = new DOM('line_block');
1607 # Calculate our minimum indentation
1608 my @indents = map length $_, $para =~ /^(?:$LINE_BLOCK)( +)\S/gm;
1609 my $indent = $indents[0];
1610 grep do { $indent = $_ if $_ < $indent }, @indents;
1611 my $spaces = ' ' x $indent;
1612 my @paras = split /^$LINE_BLOCK(?:$spaces(\S))/m, $para;
1613 my $prev = shift @paras || '';
1614 if ($prev) {
1615 if ($prev =~ /^$LINE_BLOCK *$/) {
1616 my $li = new DOM('line');
1617 $dom->append($li);
1619 else {
1620 # Start with another line block
1621 $prev =~ s/^$LINE_BLOCK$spaces/|/gm;
1622 my ($prevproc, @prevdom) =
1623 LineBlock($prev, $source, $lineno+$lines);
1624 $dom->append(grep ref $_ eq 'DOM', @prevdom);
1626 $lines += $prev =~ tr/\n//;
1627 $processed .= $prev;
1629 while (my ($pfx, $para) = splice @paras, 0, 2) {
1630 my $li = new DOM('line');
1631 $dom->append($li);
1632 $para = "$pfx$para";
1633 # Check for nested line blocks
1634 my $nest;
1635 if ((my @s = split /^($LINE_BLOCK(?: +\S))/mo, $para, 2) > 1) {
1636 ($para, $nest) = ($s[0], "$s[1]$s[-1]");
1638 # Check for immediately following blank lines
1639 ($para, my @s) = split /^$LINE_BLOCK *$/m, $para;
1640 $para =~ s/^ +//mg;
1641 push @err, Inline($li, $para, $source, $lineno+$lines);
1642 $lines += $para =~ tr/\n//;
1643 $processed .= $para;
1644 foreach (@s) {
1645 my $li = new DOM('line');
1646 $dom->append($li);
1647 $lines++;
1648 $processed .= "$LINE_BLOCK\n";
1650 if ($nest) {
1651 # Process nested line block
1652 $nest =~ s/^$LINE_BLOCK$spaces/|/gm;
1653 my ($nestproc, @nestdom) =
1654 LineBlock($nest, $source, $lineno+$lines);
1655 $dom->append(grep(ref $_ eq 'DOM', @nestdom));
1656 $lines += $nest =~ tr/\n//;
1657 $processed .= $nest;
1660 return ($processed, $dom, @err, @unp);
1663 # Normalizes an attribute by putting it in lower case and replacing sequences
1664 # of special characters with hyphens.
1665 # Arguments: string, implicit
1666 # Returns: normalized string
1667 sub NormalizeId {
1668 my ($s, $implicit) = @_;
1669 $s = '' unless defined $s;
1670 chomp $s;
1671 # Get rid of any initial numbering of implicit targets
1672 $s =~ s/^(\d+\.)+\s+// if $implicit;
1673 $s =~ s/\n/ /g;
1674 $s = NormalizeName($s);
1675 # Get rid of special characters
1676 $s =~ s/[^\w\s\'\.-]//g;
1677 # Translate sequences of spaces to a single hyphen
1678 $s =~ s/[\s\'\._]+/-/g;
1679 $s =~ s/^-|-$//g;
1680 $s = Id() if $s eq '';
1681 return $s;
1684 # Normalizes an attribute by putting it in lower case and replacing sequences
1685 # of spaces with a single space.
1686 # Arguments: string, flag to keep case
1687 # Returns: normalized string
1688 sub NormalizeName {
1689 my ($s, $keepcase) = @_;
1690 return unless defined $s;
1691 chomp $s;
1692 # Remove backslash-space combos
1693 $s =~ s/\\ //g;
1694 # Remove initial spaces
1695 $s =~ s/^\s+//;
1696 # Remove trailing spaces
1697 $s =~ s/\s+$//;
1698 # Translate to lower case
1699 $s = lc $s unless $keepcase;
1700 # Convert strings of spaces to a single space
1701 $s =~ s/[\s]+/ /g;
1702 # Remove inline markup characters
1703 $s =~ s/(^|(?!\\).)([*\`|])/$1 eq "\\" ? "$1$2" : $1/ge;
1704 $s =~ s/(^|(?!\\).)([*\`|])/$1 eq "\\" ? "$1$2" : $1/ge;
1705 # Handle backslashes
1706 $s =~ s/\\(.)/$1/g;
1707 return $s;
1710 # Processes an option list paragraph.
1711 # Arguments: paragraph, source, line number
1712 # Returns: processed paragraph, list of DOM objects and unprocessed paragraphs
1713 sub OptionList {
1714 my($para, $source, $lineno) = @_;
1716 my $dom = new DOM('option_list');
1717 my ($processed, @unp);
1718 (undef, my @paras) =
1719 split /^((?:$OPTION)(?:, (?:$OPTION))*(?: |\s*\n ))/om, $para;
1720 while (my($pfx, $para) = splice @paras, 0, 2) {
1721 $pfx =~ /^((?:$OPTION)(?:, (?:$OPTION))*)( |\s*\n )/o;
1722 my ($options, $sep) = ($1, $2);
1723 $para = "$sep$para";
1724 my @options = split(/, /, $options);
1725 my $oli = new DOM('option_list_item');
1726 $dom->append($oli);
1727 my $og = new DOM('option_group');
1728 $oli->append($og);
1729 my $option;
1730 foreach $option (@options) {
1731 $option =~ /^(-.)()(<[^>]+>)/ ||
1732 $option =~ /^([^ =]+)(?:([= ])(.*))?/;
1733 my ($string, $del, $argument) = ($1, $2, $3);
1734 my $opt = new DOM('option');
1735 $og->append($opt);
1736 my $os = new DOM('option_string');
1737 $os->append(newPCDATA DOM($string));
1738 $opt->append($os);
1739 if (defined $argument) {
1740 my $oa = new DOM('option_argument', delimiter=>$del);
1741 $oa->append(newPCDATA DOM($argument));
1742 $opt->append($oa);
1745 my $desc = new DOM('description');
1746 $oli->append($desc);
1747 my $proc = "$options$para";
1748 # Remove initial spaces
1749 $para =~ s/^ +//;
1750 my @spaces = $para =~ /^(?!\A)( +)/mg;
1751 my $spaces = defined $spaces[0] ? $spaces[0] : '';
1752 foreach (@spaces) {
1753 $spaces = $_ if length($_) < length($spaces);
1755 $para =~ s/^$spaces//mg;
1756 Paragraphs($desc, $para, $source, $lineno);
1757 $lineno += $proc =~ tr/\n//;
1758 $processed .= $proc;
1761 return ($processed, $dom, @unp);
1764 # Recursively parses a reStructuredText string that does not contain sections.
1765 # Arguments: DOM object of parent, text to parse, source, start line number
1766 # Returns: None, but appends parsed objects to the parent
1767 sub Paragraphs {
1768 my ($parent, $text, $source, $lineno) = @_;
1769 #$INDENT .= " ";
1771 return unless defined $text;
1772 # Convert any tabs to spaces
1773 while ($text =~ s/^([^\t\n]*)\t/
1774 my $l = length($1);
1775 my $ts = $main::opt_D{tabstops};
1776 my $s = " " x ($ts - ($l % $ts));
1777 "$1$s"/gem) {
1779 # Convert form feeds and vertical tabs to spaces
1780 $text =~ s/[\013\014]/ /g;
1781 # To aid keeping simple tables together, we do an initial pass
1782 # over the file quoting all the lines in simple tables to begin
1783 # with a control-A (octal 001) character.
1784 $text = QuoteSimpleTables($text);
1786 #print "${INDENT}Paragraphs(",join(',',@_),")\n";
1787 # Split into paragraphs
1788 my @para = map(do{s/^ +$//;$_},split(/^(\s*\n)+/m, $text));
1790 my $processed;
1791 my $exp_literal = 0; # Are we expecting a literal block
1792 my $doc_sec = $parent->{tag} eq 'section' ? "Section" :
1793 "Document or section";
1794 my $para;
1795 my $new_literal = 0; # Will we expect a literal block next time
1796 my @unprocessed;
1797 while (@para) {
1798 my @dom;
1799 #print STDERR "[",join("][",@para),"]";
1800 Coalesce(\@para);
1801 #print STDERR "->[",join("][",@para),"]\n";
1803 $para = shift(@para);
1804 #print STDERR "[$para]\n";
1806 my $dom;
1807 my $got_literal; # Did we get a literal block
1808 @unprocessed = ();
1810 if ((my @s = split /^$SECTION_HEADER/om, $para, 2) > 1) {
1811 if ($s[0] ne '') {
1812 $para = $s[0];
1813 unshift(@para, "$s[1]$s[-1]");
1815 else {
1816 if ($exp_literal) {
1817 $parent->append
1818 (system_message(2, $source, $lineno,
1819 "Literal block expected; none found."));
1822 my ($new_parent, $unp, @result);
1824 ($para, $unp, $new_parent, @result) =
1825 SectionBreaks($parent, $para, $source, $lineno, $para[0]);
1826 $parent->append(@result);
1827 if ($para eq '') {
1828 $para = $unp;
1830 else {
1831 $parent = $new_parent;
1832 $doc_sec = "Section";
1833 push(@unprocessed, $unp) if $unp ne '';
1834 next;
1839 if ($para =~ /^(\s*\n)*$/s) {
1840 $new_literal = $got_literal = $exp_literal;
1842 # Check for error sentinels
1843 elsif ($para =~ s/^\n//) {
1844 push (@dom, eval($para));
1845 $para = '';
1846 $new_literal = $got_literal = $exp_literal;
1848 # Check for explicit markup blocks
1849 elsif ($para =~ /^(?:\.\.|(__))( |\n)/s) {
1850 my @result;
1851 ($para, $parent, @result) = Explicit($parent, $para, $source,
1852 $lineno);
1853 push(@dom, grep(ref($_) eq 'DOM', @result));
1854 unshift(@para, grep(ref($_) ne 'DOM', @result));
1856 # Check for bulleted lists
1857 elsif ($para =~ /^($BULLETS)(?: |\n)/o) {
1858 my @result;
1859 ($para, @result) = BulletList($para, $source, $lineno);
1860 push(@dom, grep(ref($_) eq 'DOM', @result));
1861 unshift(@para, grep(ref($_) ne 'DOM', @result));
1863 # Check for line blocks
1864 elsif ($para =~ /^($LINE_BLOCK)(?: |\n)/o) {
1865 my @result;
1866 ($para, @result) = LineBlock($para, $source, $lineno);
1867 push(@dom, grep(ref($_) eq 'DOM', @result));
1868 unshift(@para, grep(ref($_) ne 'DOM', @result));
1870 # Check for enumerated lists
1871 elsif ($para =~ /^$ENUM .*\n(?=\Z|\n| |$ENUM)/o) {
1872 my @result;
1873 ($para, @result) = EnumList($para, $source, $lineno);
1874 push(@dom, grep(ref($_) eq 'DOM', @result));
1875 unshift(@para, grep(ref($_) ne 'DOM', @result));
1877 # Check for doctest blocks
1878 elsif ($para =~ /^>>> /) {
1879 my $dom = new DOM('doctest_block', %XML_SPACE);
1880 $dom->append(newPCDATA DOM($para));
1881 push(@dom, $dom);
1883 # Check for field lists
1884 elsif ($para =~ /^$FIELD_LIST/o) {
1885 my @result;
1886 ($para, @result) = FieldList($para, $source, $lineno);
1887 push(@dom, grep(ref($_) eq 'DOM', @result));
1888 unshift(@para, grep(ref($_) ne 'DOM', @result));
1890 # Check for option lists
1891 elsif ($para =~ /^$OPTION_LIST/o) {
1892 my @result;
1893 ($para, @result) = OptionList($para, $source, $lineno);
1894 push(@dom, grep(ref($_) eq 'DOM', @result));
1895 unshift(@para, grep(ref($_) ne 'DOM', @result));
1897 # Check for tables
1898 elsif ($para =~ /^(=+( +=+)+|[+](-+[+])+) *\n/ && IsTable($para)) {
1899 push(@dom, Table($para, $source, $lineno));
1901 # Check for block quotes
1902 elsif (substr($para, 0, 1) eq ' ') {
1903 # It's indented: it must be a block quote or indented literal
1904 my $dom;
1905 if ($exp_literal) {
1906 $dom = new DOM('literal_block', %XML_SPACE);
1908 else {
1909 $dom = new DOM('block_quote');
1911 push @dom, $dom;
1912 # Compute the minimum indent of my lines
1913 my $min_indent = 0xffff;
1914 my @spaces = $para =~ /^( *)\S/mg;
1915 foreach (@spaces) {
1916 my $len = length($_);
1917 last if $len == 0;
1918 $min_indent = $len if $len < $min_indent;
1920 # Make sure nothing is unindented
1921 my $badindent = 0;
1922 if ((my @s = split /^(\S)/m, $para, 2) > 1) {
1923 unshift(@para, "$s[1]$s[-1]");
1924 $para = $s[0];
1925 $badindent = 1;
1927 my $spaces = ' ' x $min_indent;
1928 $para =~ s/^$spaces//mg;
1929 if ($exp_literal) {
1930 $dom->append(newPCDATA DOM($para));
1932 else {
1933 # Check for an attribution
1934 my $attr = $1
1935 if ($dom->{tag} eq 'block_quote' &&
1936 $para =~ s/\n(?:---?|\\u2014)(?!-) *(.*(\n( *).*(\n\2\S.*)*)?\Z)/\n/m);
1937 Paragraphs($dom, $para, $source, $lineno);
1938 if (defined $attr) {
1939 my ($spaces) = $attr =~ /\n( *)/;
1940 $attr =~ s/^$spaces//gm if defined $spaces;
1941 my $attribution = new DOM('attribution');
1942 $dom->append($attribution);
1943 $attribution->append(newPCDATA DOM($attr));
1944 $para .= $attr;
1947 my $block = $exp_literal ? "Literal block" : "Block quote";
1948 unshift(@para,
1949 "\n" .
1950 qq(system_message(2, \$source, \$lineno,
1951 "$block ends without a blank line; unexpected unindent.")))
1952 if $badindent;
1953 $got_literal = 1;
1955 # Check for quoted literals (must precede check for definition lists)
1956 elsif ($exp_literal && $para =~ /^(($SEC_CHARS)[^\n]*\n(?:\2[^\n]*\n)*)(.*)/so) {
1957 my ($lit, $quote, $next) = ($1, $2, $3);
1958 my $dom = new DOM('literal_block', %XML_SPACE);
1959 push @dom, $dom;
1960 $dom->append(newPCDATA DOM($lit));
1961 if ($next ne '') {
1962 unshift @para, $next;
1963 if (substr($next, 0, 1) eq ' ') {
1964 unshift(@para, "\n" .
1965 qq(system_message(3, \$source, \$lineno,
1966 "Unexpected indentation.")));
1968 else {
1969 unshift(@para, "\n" .
1970 qq(system_message(3, \$source, \$lineno,
1971 "Inconsistent literal block quoting.")));
1974 $para = $lit;
1975 $got_literal = 1;
1977 # Check for definition lists
1978 elsif ($para =~ /^(\S.*)\n( +)/) {
1979 my @result;
1980 ($para, @result) = DefinitionList($para, $source, $lineno);
1981 push(@dom, grep(ref($_) eq 'DOM', @result));
1982 unshift(@para, grep(ref($_) ne 'DOM', @result));
1984 # Check for transitions
1985 elsif ($para =~ /^(($SEC_CHARS)\2\2\2+)$/o) {
1986 if (length($1) < 4) {
1987 push(@dom, system_message(1, $source, $lineno,
1988 "Unexpected possible title overline or transition.\nTreating it as ordinary text because it's so short."));
1989 my $p = new DOM('paragraph');
1990 $p->append(newPCDATA DOM($para));
1991 push(@dom, $p);
1993 elsif ($parent->{tag} !~ /^(document|section)$/) {
1994 push(@dom, system_message(4, $source, $lineno,
1995 "Unexpected section title or transition.",
1996 $para));
1998 else {
1999 my $last_sibling = $parent->num_contents() ?
2000 $parent->{content}[-1] : {};
2001 my $transition = new DOM('transition');
2002 push(@dom, $transition);
2003 $transition->{source} = $source;
2004 $transition->{lineno} = $lineno;
2007 # It must just be a paragraph
2008 else {
2009 my $p = new DOM('paragraph');
2010 my $pre;
2011 if ((($pre) = $para =~ /(.*):: *$/s) &&
2012 (! defined $pre || $pre !~ /(^|[^\\])(\\\\)*\\$/)) {
2013 # We've got a literal block tagged on to us
2014 $new_literal = 1;
2015 $para =~ s/(^|.)(\s*):: *\n$/!$1 ? '' : $2 ? "$1\n" : "$1:\n"/e;
2017 if ($para ne "") {
2018 push(@dom,($p, Inline($p, $para, $source, $lineno)));
2019 # Clean up trailing whitespace
2020 $p->{content}[-1]{text} =~ s/ +$//
2021 if defined $p->{content}[-1]{text};
2024 if ($exp_literal && ! $got_literal) {
2025 $parent->append
2026 (system_message(2, $source, $lineno,
2027 "Literal block expected; none found."));
2029 $parent->append(@dom);
2031 continue {
2032 $exp_literal = $new_literal;
2033 $lineno += $para =~ tr/\n//;
2034 $processed .= $para;
2035 $new_literal = 0;
2036 # Push unprocessed information back to front of list
2037 my @unp;
2038 foreach (@unprocessed) {
2039 my @p = split(/^(\s*\n)+/m, $_);
2040 push (@unp, @p);
2042 unshift (@para, @unp);
2045 if ($exp_literal) {
2046 $parent->append
2047 (system_message(2, $source, $lineno,
2048 "Literal block expected; none found."));
2052 # Parses a reStructuredText document.
2053 # Arguments: First line of file, whether we're at end of file
2054 # Returns: DOM object
2055 # Uses globals: <> file handle
2056 sub Parse {
2057 my ($first_line, $eof) = @_;
2058 my $next_first_line;
2059 my $source = defined $main::opt_D{source} ? $main::opt_D{source} :
2060 $ARGV;
2061 my @file;
2062 if (! $eof) {
2063 while (<>) {
2064 push @file, $_;
2065 if (eof) {
2066 close ARGV;
2067 $next_first_line = <>;
2068 $eof = eof;
2069 last;
2073 my $file = join('',@file);
2074 my $dom = new DOM ('document', source=>$source);
2075 $dom->{source} = $source;
2076 my $text = "$first_line$file";
2078 init($dom);
2079 Paragraphs($dom, $text, $source, 1);
2081 # Do transformations on the DOM
2082 use Transforms;
2083 my $transform;
2084 foreach $transform (@Transforms::TRANSFORMS) {
2085 next if (defined $main::opt_D{xformoff} &&
2086 $transform =~ /$main::opt_D{xformoff}/o);
2087 my $t = $transform;
2088 $t =~ s/\./::/g;
2089 if (! defined &$t) {
2090 $dom->append
2091 (system_message(4, $source, 0,
2092 qq(No transform code found for "$transform".)));
2094 else {
2095 no strict 'refs';
2096 &$t($dom);
2100 return $dom, $next_first_line, $eof;
2103 # Quotes lines involved in simple tables that could be confused with
2104 # section headers by starting them with a control-A (octal 001)
2105 # character. Works only with properly formatted tables (ones followed
2106 # by a blank line).
2107 # Arguments: text string
2108 # Returns: quoted text string
2109 sub QuoteSimpleTables {
2110 my($text) = @_;
2111 return "" unless defined $text;
2112 my $processed = '';
2113 while ((my @s = split /^(=+( +=+)+ *\n)/m, $text, 2) > 1) {
2114 my $line = $s[1];
2115 my $len = length($line);
2116 $processed = "$processed$s[0]$s[1]";
2117 $text = "$s[-1]";
2118 if ((my @s = split /^((=[ =]+= *\n)(\n|\Z))/m, $text, 2) > 1) {
2119 my $table = "$s[0]$s[2]";
2120 $text = "$s[3]$s[-1]";
2121 $table =~ s/^/\001/gm;
2122 $processed = "$processed$table";
2125 return "$processed$text";
2128 # Registers a target name and returns an error DOM if it is an illegal
2129 # duplicate.
2130 # Arguments: target DOM, source, line number
2131 # Returns: optional error DOM
2132 # Uses globals: %TARGET_NAME
2133 sub RegisterName {
2134 my ($dom, $source, $lineno) = @_;
2135 my $error = '';
2136 my $casename = defined $dom->{attr}{names} ? $dom->{attr}{names}[0] : '';
2137 my $name = lc $casename;
2138 push(@ANONYMOUS_TARGETS, $dom) if $dom->{attr}{anonymous};
2140 my $tag = $dom->{tag};
2141 if ($tag =~ /^(footnote|substitution|citation)/) {
2142 $REFERENCE_DOM{$tag}{$casename} =
2143 $REFERENCE_DOM{"$tag.lc"}{$name} = $dom
2144 if $casename ne '';
2145 $REFERENCE_DOM{$tag}{$dom->{attr}{ids}[0]} = $dom
2146 if defined $dom->{attr}{ids};
2148 return unless defined $name;
2149 my $uri = $dom->{attr}{refuri};
2150 my $level = 1;
2151 my $target;
2152 my %tags;
2153 BEGIN {%NAMESPACE = (section=>'target', substitution_definition=>'subst',
2155 %CITSPACE= (); }# citation=>'footcit', footnote=>'footcit');}
2156 my $space = $NAMESPACE{$tag} || 'target';
2157 #print "$dom->{tag}: $name [$space]\n";
2158 foreach $target (@{$TARGET_NAME{$space}{$name}}) {
2159 next if $name eq '' || defined $target->{attr}{names} &&
2160 $target->{attr}{names}[0] ne $casename;
2161 my $ttag = $target->{tag};
2162 $tags{$ttag}++;
2163 if ($tag =~ /substitution/) {
2164 $level = 3;
2166 if (((defined $uri && ($target->{attr}{refuri} || '') ne $uri) ||
2167 (! defined $uri &&
2168 # ($tag !~ 'section' && $ttag ne 'section') &&
2169 # Both targets are explicit
2170 ($tag =~ /^(target|footnote|citation)$/ &&
2171 $ttag =~ /^(target|footnote|citation)$/) &&
2172 ($CITSPACE{$tag} || '') eq ($CITSPACE{$ttag} || '')))
2173 && $level < 2) {
2174 $level = 2;
2176 if ($ttag ne $tag || $tag ne 'target' || $level == 2) {
2177 $target->{attr}{dupnames} = $dom->{attr}{names};
2178 delete $target->{attr}{names};
2181 push (@{$TARGET_NAME{$space}{$name}}, $dom);
2182 push (@{$ALL_TARGET_IDS{$dom->{attr}{ids}[0]}}, $dom)
2183 if defined $dom->{attr}{ids};
2184 push (@{$ALL_TARGET_NAMES{$name}}, $dom);
2185 my @same_name_targets =
2186 grep ($_ ne $dom &&
2187 (defined $_->{attr}{names} && $_->{attr}{names}[0] ||
2188 defined $_->{attr}{dupnames} && $_->{attr}{dupnames}[0] || '')
2189 eq $casename,
2190 @{$TARGET_NAME{$space}{$name}}) if $name ne '';
2191 if (@same_name_targets > 0) {
2192 my %attr;
2193 if ($tag !~ /substitution/) {
2194 if ($tags{$tag}) {
2195 $dom->{attr}{dupnames} = $dom->{attr}{names};
2196 delete $dom->{attr}{names};
2198 $dom->{attr}{ids} = [ Id() ]
2199 unless $dom->{attr}{ids}[0] =~ /^id\d+$/;
2200 my $id = $dom->{attr}{ids}[0];
2201 $attr{backrefs} = [ $id ];
2203 my $plicit = $tag =~ /substitution/ ? 'substitution definition' :
2204 $tag =~ /target|footnote/ ? 'explicit target' :
2205 'implicit target';
2206 $error = system_message($level, $source, $lineno,
2207 qq(Duplicate $plicit name: "$name".),
2208 "", %attr);
2210 return $error;
2213 # Takes a string and handles backslash-quoting of characters
2214 # Arguments: string
2215 # Returns: processed string
2216 sub RemoveBackslashes {
2217 my ($str) = @_;
2218 $str =~ s/\\(?!u[\da-fA-F]{4}|x[\da-fA-F]{2})(.)/$1 eq ' ' || $1 eq "\n" ? '' : $1/seg;
2219 $str;
2222 # Takes the name associated with one DOM and reassigns it to another
2223 # Arguments: source DOM, target DOM
2224 # Returns: None
2225 # Uses globals: %TARGET_NAME
2226 sub ReregisterName {
2227 my ($olddom, $newdom) = @_;
2229 foreach my $id (@{$olddom->{attr}{ids}}) {
2230 @{$ALL_TARGET_IDS{$id}} =
2231 map $_ eq $olddom ? $newdom : $_, @{$ALL_TARGET_IDS{$id}};
2234 if ($olddom->{attr}{names}) {
2235 my $tag = $olddom->{tag};
2236 my $space = $NAMESPACE{$tag} || 'target';
2237 foreach my $casename (@{$olddom->{attr}{names}}) {
2238 my $name = lc $casename;
2239 next unless defined $name;
2241 @{$TARGET_NAME{$space}{$name}} =
2242 map $_ eq $olddom ? $newdom : $_, @{$TARGET_NAME{$space}{$name}};
2243 @{$ALL_TARGET_NAMES{$name}} =
2244 map $_ eq $olddom ? $newdom : $_, @{$ALL_TARGET_NAMES{$name}};
2247 return;
2250 # Parses any section breaks in the text.
2251 # Arguments: DOM object of parent, text to parse, source, line number,
2252 # first paragraph in section
2253 # Returns: processed text, unprocessed text, new parent DOM object,
2254 # list of DOM objects
2255 sub SectionBreaks {
2256 my ($parent, $text, $source, $lineno, $section) = @_;
2257 my @dom;
2258 my $new_parent = $parent;
2260 $text =~ /^$SECTION_HEADER/o;
2261 my @sect;
2262 my $line;
2263 my $char;
2264 if (defined $3) {
2265 $char = "\\$3";
2266 @sect = split /^((?!$SEC_CHARS+\n(?:\n|\Z))(?!(?:\.\.|::)\n(?: |\n))($char$char+)\n(.*\n)?(?:(($SEC_CHARS)\5+)\n)?)/m, $text, 2;
2267 $line = 'over';
2269 else {
2270 $char = "\\$9";
2271 @sect = split /^((\S.*\n)(($char)$char+)\n)/m, $text, 2;
2272 $line = 'under';
2274 my $next = "$sect[-1]";
2276 shift @sect;
2278 # Now process the section header
2279 my ($lit, $over, $title, $under, $under_char) =
2280 map(defined $sect[$_] ? $sect[$_] : '',
2281 $line eq 'over' ? (0..4) : (0, 5, 1..3));
2282 if ($under eq '' && $title =~ /^($SEC_CHARS)\1+$/) {
2283 $under = $title;
2284 chomp $under;
2285 $title = '';
2287 # print STDERR "[$lit][$over][$title][$under][$under_char][]\n";
2288 my $lit_title = $title;
2289 $title =~ s/^\s+//;
2291 # Default to saying we've processed the literal part
2292 my $processed = $lit;
2293 my $unprocessed = $next;
2294 my $err = 1;
2296 # Check for errors
2297 if ($parent->{tag} !~ /^(document|section)$/) {
2298 return ('', $text)
2299 if $line eq 'under' && length($under) < $MIN_SEC_LEN;
2300 if ($line eq 'over' && length($over) < $MIN_SEC_LEN) {
2301 push(@dom,
2302 system_message(1, $source, $lineno,
2303 "Unexpected possible title overline or transition.\n" .
2304 "Treating it as ordinary text because it's so short."));
2305 $processed = "";
2306 $unprocessed = $text;
2308 else {
2309 # It's a bogus section header in a block quote
2310 push(@dom,
2311 system_message(4, $source,
2312 $lineno+(($line eq 'over') ? 2 : 1),
2313 "Unexpected section title.",
2314 $lit));
2317 elsif ($line eq 'over' &&
2318 (length($title) == 0 ||
2319 length($over) < length($title)-1) &&
2320 length($over) < $MIN_SEC_LEN) {
2321 # We don't actually consider this to be a section header
2322 push(@dom,
2323 system_message(1, $source, $lineno,
2324 "Possible incomplete section title.\n" .
2325 "Treating the overline as ordinary text because it's so short."));
2326 if (length($title) == 0) {
2327 # It's a title that looks like a section header...
2328 $line = 'under';
2329 $title = $over;
2330 $char = substr($under, 0, 1);
2331 $err = 0;
2333 else {
2334 $processed = "";
2335 $unprocessed = $text;
2338 elsif ($line eq 'under' &&
2339 length($under) < length($title)-1 &&
2340 length($under) < $MIN_SEC_LEN) {
2341 # We don't actually consider this to be a section header
2342 push(@dom,
2343 system_message(1, $source, $lineno+1,
2344 "Possible title underline, too short for the title.\n" .
2345 "Treating it as ordinary text because it's so short."));
2346 $processed = "";
2347 $unprocessed = $text;
2349 elsif ($line eq 'over' && $under eq '') {
2350 push(@dom,
2351 system_message(4, $source, $lineno,
2352 defined $section && $section ne '' ?
2353 "Missing matching underline for section title overline." :
2354 "Incomplete section title.",
2355 "$over\n$lit_title"));
2357 elsif ($line eq 'over' && $under ne $over) {
2358 push(@dom,
2359 system_message(4, $source, $lineno,
2360 "Title overline & underline mismatch.",
2361 "$over\n$lit_title$under\n"));
2363 elsif ($line eq 'over' && $title eq '') {
2364 push(@dom,
2365 system_message(3, $source, $lineno,
2366 "Invalid section title or transition marker.",
2367 "$over\n$lit_title$under\n"));
2369 else {
2370 $err = 0;
2372 if (! $err) {
2373 # Make sure the section style is consistent
2374 my $secstyle = "$line$char";
2375 if (! defined $SEC_LEVEL{$secstyle} &&
2376 @SEC_DOM < @SEC_STYLE) {
2377 push(@dom,
2378 system_message(4, $source,
2379 $lineno + (($line eq 'over') ? 1 : 0),
2380 "Title level inconsistent:",
2381 "$lit"));
2383 else {
2384 my $dom = new DOM('section');
2385 if (! defined $SEC_LEVEL{$secstyle}) {
2386 push(@SEC_STYLE, $secstyle);
2387 $SEC_LEVEL{$secstyle} = $#SEC_STYLE;
2389 else {
2390 splice(@SEC_DOM, $SEC_LEVEL{$secstyle});
2392 if (@dom) {
2393 $SEC_DOM[-1]->append(@dom);
2394 @dom = ();
2396 $SEC_DOM[-1]->append($dom);
2397 push(@SEC_DOM, $dom);
2398 $new_parent = $dom;
2399 my $titledom = new DOM('title');
2400 my @errs = Inline($titledom, $title, $source, $lineno);
2401 $dom->append($titledom);
2402 # Reconstruct title text less markup
2403 my $ttext = '';
2404 $titledom->Recurse(sub {
2405 my ($dom) = @_;
2406 $ttext .= $dom->{text} if $dom->{tag} eq '#PCDATA';
2408 my $id = NormalizeId($ttext, 1);
2409 my $name = NormalizeName($ttext);
2410 @{$dom->{attr}}{qw(ids names)} = ([ $id ], [ $name ]);
2411 my $err = RegisterName($dom, $source, $lineno+1);
2412 $dom->append($err) if $err;
2413 # Check for short underlines
2414 if ($line eq 'under' && length($title) > length($under)+1) {
2415 $dom->append
2416 (system_message(2, $source, $lineno+1,
2417 "Title underline too short.",
2418 "$lit_title$under\n"));
2420 if ($line eq 'over' && length($title) > length($over)+1) {
2421 $dom->append
2422 (system_message(2, $source, $lineno,
2423 "Title overline too short.",
2424 "$over\n$lit_title$under\n"));
2429 return $processed, $unprocessed, $new_parent, @dom;
2432 # Returns a DOM object for an RST table object given a simple table
2433 # text string.
2434 # Arguments: text string, source, line number
2435 # Returns: DOM object
2436 sub SimpleTable {
2437 my($text, $source, $lineno) = @_;
2439 # Split the table into its constituent lines
2440 my $table = $text;
2441 chomp $table;
2442 $table =~ s/^\001//gm;
2443 my @lines = split(/ *\n/, $table);
2444 $lines[-1] =~ s/ +$//;
2446 # We can compute the column boundaries from the first line.
2447 # It is complicated by the fact that a column separator may be more
2448 # than one character.
2449 my @segments = split(/( +)/, $lines[0]);
2450 my (@colstart, @colwidth, @sep);
2451 my $col = 0;
2452 foreach (@segments) {
2453 my $len = length($_);
2454 if (substr($_, 0, 1) eq ' ') {
2455 push(@sep, [$col,$len]);
2457 else {
2458 push(@colstart, $col);
2459 push(@colwidth, $len);
2462 $col += $len;
2464 # Now look for a header row
2465 my $head = 1; # The line on which the heading ends
2466 my $l;
2467 my $last_equal_line = 0;
2468 for ($l=1; $l < @lines; $l++) {
2469 $_ = $lines[$l];
2470 if (/^=+( +=+)* *$/) {
2471 return system_message(3, $source, $lineno,
2472 "Malformed table.\nBottom/header table border does not match top border.",
2473 $table)
2474 if length($lines[$l]) != length($lines[0]);
2475 $head = $last_equal_line+1;
2476 $last_equal_line = $l;
2480 my $dom = new DOM('table');
2481 $dom->{tableattr} = $main::opt_D{tableattr};
2482 my $tgroup = new DOM('tgroup', cols=>@colwidth+0);
2483 $dom->append($tgroup);
2484 my $colspec;
2485 foreach (@colwidth) {
2486 $colspec = new DOM('colspec', colwidth=>$_);
2487 $tgroup->append($colspec);
2489 my $tbody;
2490 if ($head > 1) {
2491 $tbody = new DOM('thead');
2492 $tgroup->append($tbody);
2495 # Process all the rows of the table
2496 my $row_start = 0;
2497 for ($l=1; $l < @lines; $l++) {
2498 $_ = $lines[$l];
2499 next if $l == $row_start && !/^([=-])(?:\1| )+\1 *$/;
2500 my $col1 = substr($_, 0, $colwidth[0]);
2501 if ($col1 =~ /\S/ || ($lines[$l-1] =~ /^([=-])(?:\1| )+\1 *$/ &&
2502 ! /^\s*$/)) {
2503 # We've hit the beginning of the next row; process the previous one
2504 my $next_row_start;
2505 my ($row_colstart, $row_colwidth, $row_sep);
2506 if (/^([=-])(?:\1| )+\1 *$/) {
2507 return system_message(3, $source, $lineno,
2508 "Malformed table.\nColumn span incomplete at line offset $l.",
2509 $table)
2510 if length($_) < length($lines[0]);
2511 # It's a separator/column span line
2512 for ($next_row_start=$l+1; $next_row_start<@lines;
2513 $next_row_start++) {
2514 my $next_col1 =
2515 substr($lines[$next_row_start],0,$colwidth[0]);
2516 last if $next_col1 =~ /\S/;
2518 @segments = split(/( +)/);
2519 my $col = 0;
2520 foreach (@segments) {
2521 my $len = length($_);
2522 if (substr($_, 0, 1) eq ' ') {
2523 push(@$row_sep, [$col, $len]);
2525 else {
2526 push(@$row_colstart, $col);
2527 push(@$row_colwidth, $len);
2529 $col += $len;
2531 # Do sanity check on the separator starts
2532 my $s;
2533 foreach $s (@$row_sep) {
2534 return system_message(3, $source, $lineno,
2535 "Malformed table.\nColumn span alignment problem at line offset @{[$row_start+1]}.",
2536 $table)
2537 unless grep($_->[0] == $s->[0] && $_->[1] == $s->[1],
2538 @sep);
2541 else {
2542 $next_row_start = $l;
2543 ($row_colstart, $row_colwidth, $row_sep) =
2544 (\@colstart, \@colwidth, \@sep);
2546 # Process the row
2547 if ($row_start > 0 && $row_start <= $l) {
2548 if ($row_start == $head) {
2549 $tbody = new DOM('tbody');
2550 $tgroup->append($tbody);
2552 my $row = new DOM('row');
2553 $tbody->append($row);
2555 my $row_end = $l-1;
2556 my $col;
2557 # Make sure we don't have text in the column separators
2558 for ($col=0; $col<@$row_sep; $col++) {
2559 my ($start,$width) = @{$row_sep->[$col]};
2560 my $septext =
2561 join('',map(do {local $^W = 0;
2562 substr($lines[$_],$start,$width)
2563 . "\n"},
2564 $row_start .. $row_end));
2565 return system_message(3, $source, $lineno,
2566 "Malformed table.\nText in column margin at line offset $row_start.",
2567 $table)
2568 if $septext =~ /\S/;
2570 # Produce the entries
2571 for ($col=0; $col<@$row_colstart; $col++) {
2572 my $entry = new DOM('entry');
2573 $row->append($entry);
2575 my $start = $row_colstart->[$col];
2576 my $width = $col < $#$row_colstart ? $row_colwidth->[$col]
2577 : 0xffff;
2578 my $end = $start+$width;
2579 # Compute the column spans
2580 my @colspans = grep($_ > $start && $_ < $end, @colstart);
2581 $entry->{attr}{morecols} = @colspans if @colspans > 0;
2582 my $celltext =
2583 join('',map(do {local $^W = 0;
2584 substr($lines[$_],$start,$width)
2585 . "\n"}, $row_start .. $row_end));
2586 # Handle right/center alignment for single-row entries
2587 if (($celltext =~ /\A.*\n[ \n]*\Z/) &&
2588 $main::opt_D{align}) {
2589 $celltext =~ /(.*)/;
2590 my $ct = $1;
2591 $entry->{attr}{align} = $ct =~ /^\S/ ? 'left' :
2592 $ct =~ / $/ ||
2593 length($ct) < $row_colwidth->[$col] ? 'center' :
2594 'right';
2596 # May need to update the colspec for text that overflows
2597 # the last column
2598 if ($col == $#$row_colstart) {
2599 my $colwidth = 0;
2600 grep(do{
2601 my $cell_line = do {
2602 local $^W=0;
2603 substr($lines[$_],$start,$width) . ''};
2604 my $len = length($cell_line);
2605 $colwidth = $len if $len > $colwidth;
2606 }, $row_start .. $row_end);
2607 $colwidth +=
2608 $row_colstart->[$col] - $colstart[-1];
2609 $colspec->{attr}{colwidth} = $colwidth
2610 if $colwidth > $colspec->{attr}{colwidth};
2612 $celltext =~ s/ *$//gm;
2613 # Delete common indent
2614 $celltext =~ /^( *)/;
2615 my $spaces = $1;
2616 $celltext =~ s/^$spaces//gm;
2617 Paragraphs($entry, $celltext, $source,
2618 $lineno+$row_start);
2619 $entry->{entryattr} = $main::opt_D{entryattr};
2621 $row->{rowattr} = $main::opt_D{rowattr};
2623 $row_start = $next_row_start;
2627 if ($table !~ /\n=+( +=+)* *$/) {
2628 if ($table =~ /((?:.)+\n=+( +=+)*\n)(.*)/s) {
2629 my ($table,$rest) = ($1, $3);
2630 $dom =
2631 system_message(3, $source, $lineno,
2632 "Malformed table.\nNo bottom table border found or no blank line after table bottom.",
2633 $table)
2634 if $head == 1;
2635 $lineno += ($table =~ tr/\n//);
2636 my $err =
2637 system_message(2, $source, $lineno,
2638 "Blank line required after table.");
2639 my $fake = new DOM('fake');
2640 Paragraphs($fake, $rest, $source, $lineno);
2641 return ($dom, $err, $fake->contents());
2643 else {
2644 return system_message(3, $source, $lineno,
2645 "Malformed table.\nNo bottom table border found.",
2646 "$table\n");
2650 return $dom;
2653 # Returns a DOM object for an RST table object.
2654 # Arguments: text string, source, line number
2655 # Returns: DOM object
2656 sub Table {
2657 my($text, $source, $lineno) = @_;
2659 return SimpleTable($text, $source, $lineno)
2660 if $text =~ /^=[ =]+= *\n/;
2661 # Split the table into its constituent lines
2662 my $table = $text;
2663 chomp $table;
2664 my @lines = split(/ *\n/, $table);
2665 my $head = 0; # The line on which the heading ends
2667 # Create a graph to figure out how the cells are connected
2668 use Graph;
2669 my $g = new Graph;
2670 # Look for plus signs that are connected to other plus signs
2671 my $v;
2672 for ($v=0; $v < @lines; $v++) {
2673 my @segments = split(/[+]/, $lines[$v]);
2674 push(@segments, "") if substr($lines[$v],-1,1) eq '+';
2675 my $s;
2676 # Note: we start at $s=1 because that's where the first plus sign was
2677 my $h = length($segments[0]);
2678 for ($s=1; $s < @segments; $s++) {
2679 my $seg = $segments[$s];
2681 # Check for a horizontal edge
2682 $g->AddEdge([$v,$h],[$v,$h+length($seg)+1])
2683 if ($seg =~ /^([-=])\1* *$/);
2684 $head = $v if $1 eq '=';
2685 # Check for a vertical edge
2686 if ($v < @lines-2 &&
2687 do { local $^W=0; substr($lines[$v+1],$h,1) eq '|'}) {
2688 my $v1;
2689 for ($v1=$v+2; $v1<@lines; $v1++) {
2690 last unless substr($lines[$v1],$h,1) eq '|';
2692 $g->AddEdge([$v,$h],[$v1,$h])
2693 if substr($lines[$v1],$h,1) eq '+';
2696 $h += length($seg) + 1;
2699 # Now we mark everything that is reachable from 0,0
2700 $g->DFS([0,0], sub {my ($g,$p) = @_; $g->SetVertexProp($p,'mark',1)});
2701 my @verts = grep($g->GetVertexProp($_,'mark'),$g->GetVertices());
2702 my $vmax = $#lines;
2703 my $hmax = length($lines[0])-1;
2704 my (%rows,%cols);
2705 foreach (@verts) {
2706 $rows{$_->[0]} = 1;
2707 $cols{$_->[1]} = 1;
2709 my @rows = sort {$a <=> $b} keys %rows;
2710 my @cols = sort {$a <=> $b} keys %cols;
2712 # Check that all vertices except the corners have degree >= 3 and that
2713 # corners have degree 2
2714 foreach (@verts) {
2715 my @edges = $g->GetVertexEdges($_);
2716 my $iscorner = ($_->[0] == 0 || $_->[0] == $vmax) &&
2717 ($_->[1] == 0 || $_->[1] == $hmax);
2718 return system_message(3, $source, $lineno, "Malformed table.", $text)
2719 if $iscorner && @edges != 2;
2720 return system_message(3, $source, $lineno, "Malformed table.\nMalformed table; parse incomplete.",
2721 $text)
2722 if ! $iscorner && @edges < 3;
2725 my $dom = new DOM('table');
2726 $dom->{tableattr} = $main::opt_D{tableattr};
2727 my $tgroup = new DOM('tgroup', cols=>$#cols);
2728 $dom->append($tgroup);
2729 my $c;
2730 for ($c=0; $c < $#cols; $c++) {
2731 $tgroup->append(new DOM('colspec',colwidth=>$cols[$c+1]-$cols[$c]-1));
2733 my $tbody;
2734 if ($head > 0) {
2735 $tbody = new DOM('thead');
2736 $tgroup->append($tbody);
2739 # Now we go through all the upper-left corners of cells adding them
2740 # to the table body.
2741 my $lastv = -1;
2742 my $row;
2743 #print join(',',map("[$_->[0],$_->[1]]",@verts)),"\n";
2744 #print join(',',@rows),"\n";
2745 foreach (@verts) {
2746 my ($v, $h) = @$_;
2747 next if $v == $vmax || $h == $hmax;
2748 if ($v > $lastv) {
2749 if ($v == $head) {
2750 $tbody = new DOM('tbody');
2751 $tgroup->append($tbody);
2753 $row->{rowattr} = $main::opt_D{rowattr} if defined $row;
2754 $row = new DOM('row');
2755 $tbody->append($row);
2758 # This is only the top-left if it has edges right and down
2759 my ($down,$right,$p2);
2760 my @edges = $g->GetVertexEdges([$v,$h]);
2761 foreach $p2 (@edges) {
2762 $right = $p2->[1] if $p2->[0] == $v && $p2->[1] > $h;
2763 $down = $p2->[0] if $p2->[1] == $h && $p2->[0] > $v;
2765 next unless defined $right && defined $down;
2767 my $entry = new DOM('entry');
2768 $row->append($entry);
2769 # Check for row and column spans
2770 # Track right to an edge that goes down and down to an edge
2771 # that goes right
2772 my $p1 = $right;
2773 while (defined $p1) {
2774 my ($r, $d);
2775 my @edges = $g->GetVertexEdges([$v,$p1]);
2776 foreach $p2 (@edges) {
2777 $r = $p2->[1] if $p2->[0] == $v && $p2->[1] > $p1;
2778 $d = $p2->[0] if $p2->[1] == $p1 && $p2->[0] > $v;
2780 if (defined $d) {
2781 $right = $p1;
2782 last;
2784 $p1 = $r;
2786 @edges = $g->GetVertexEdges([$v,$h]);
2787 $p1 = $down;
2788 while (defined $p1) {
2789 my ($r, $d);
2790 my @edges = $g->GetVertexEdges([$p1,$h]);
2791 foreach $p2 (@edges) {
2792 $r = $p2->[1] if $p2->[0] == $p1 && $p2->[1] > $h;
2793 $d = $p2->[0] if $p2->[1] == $h && $p2->[0] > $p1;
2795 if (defined $r) {
2796 $down = $p1;
2797 last;
2799 $p1 = $d;
2801 #print "[$v,$h] [$down,$right]\n";
2802 my @cspans = grep($_ > $v && $_ <= $down, @rows);
2803 my @rspans = grep($_ > $h && $_ <= $right, @cols);
2804 $entry->{attr}{morecols} = @rspans-1 if @rspans > 1;
2805 $entry->{attr}{morerows} = @cspans-1 if @cspans > 1;
2806 my $chars = $right - $h - 1;
2807 my $celltext = join('', map(substr($lines[$_], $h+1, $chars) . "\n",
2808 (($v+1) .. ($down-1))));
2809 # Delete trailing spaces
2810 $celltext =~ s/ *$//gm;
2811 # Delete common indent
2812 $celltext =~ /^( *)/;
2813 my $spaces = $1;
2814 $celltext =~ s/^$spaces//gm;
2815 Paragraphs($entry, $celltext, $source, $lineno+$v+1);
2816 $entry->{entryattr} = $main::opt_D{entryattr};
2817 $lastv = $v;
2819 $row->{rowattr} = $main::opt_D{rowattr} if defined $row;
2820 return $dom;
2823 # Returns the error messages for an unknown role name
2824 # Arguments: role name, source, line number, optional list of attributes for
2825 # last system_message
2826 # Returns: array of DOM objects
2827 sub UnknownRole {
2828 my ($role, $source, $lineno, $lit, %attrs) = @_;
2830 return
2831 system_message(1, $source, $lineno,
2832 qq(No role entry for "$role" in module "docutils.parsers.rst.languages.en".\nTrying "$role" as canonical role name.),
2833 ""),
2834 system_message(3, $source, $lineno,
2835 qq(Unknown interpreted text role "$role".),
2836 $lit, %attrs);
2839 package RST::Role;
2841 # Checks for validity of a PEP reference
2842 sub PEP {
2843 my ($pep, $lit, $parent, $source, $lineno) = @_;
2845 my @errs;
2846 if ($pep !~ /^\d+$/ || $pep < 0 || $pep > 9999) {
2847 my ($dom,$refid,$id) = RST::problematic($lit);
2848 $parent->append($dom);
2849 push @errs, RST::system_message(3, $source, $lineno,
2850 qq(PEP number must be a number from 0 to 9999; "$pep" is invalid.),
2851 "", backrefs=>[ $id ],
2852 ids=>[ $refid ]);
2854 return @errs;
2857 # Checks for validity of an RFC reference
2858 sub RFC {
2859 my ($rfc, $lit, $parent, $source, $lineno) = @_;
2861 my @errs;
2862 if ($rfc !~ /^\d+$/ || $rfc < 1) {
2863 my ($dom,$refid,$id) = RST::problematic($lit);
2864 $parent->append($dom);
2865 push @errs, RST::system_message(3, $source, $lineno,
2866 qq(RFC number must be a number greater than or equal to 1; "$rfc" is invalid.),
2867 "", backrefs=>[ $id ],
2868 ids=>[ $refid ]);
2870 return @errs;
2873 # Checks for validity of an RAW reference
2874 sub raw {
2875 my ($raw, $lit, $parent, $source, $lineno, $role) = @_;
2877 my @errs;
2878 if (! defined $RST::MY_ROLES{$role}{attr}{format}) {
2879 my ($dom,$refid,$id) = RST::problematic($lit);
2880 $parent->append($dom);
2881 push @errs, RST::system_message(3, $source, $lineno,
2882 qq(No format (Writer name) is associated with this role: "raw".\nThe "raw" role cannot be used directly.\nInstead, use the "role" directive to create a new role with an associated format.),
2883 "", backrefs=>[ $id ],
2884 ids=>[ $refid ]);
2886 return @errs;
2889 package RST::Directive;
2891 # This package contains the code for the various RST built-in directives
2893 # Data structures:
2894 # _`Directive arguments hash ref`: Returned by
2895 # RST::Directive::parse_directive. It has keys:
2896 # ``name``: the directive name
2897 # ``args``: parsed arguments from dtext
2898 # ``options``: reference to hash of parsed option/value pairs
2899 # ``content``: parsed content from dtext
2900 # ``content_lineno``: line number of the content block
2901 # _`Role definition hash reference`: Used for a role definition. It
2902 # has keys (all are optional except tag):
2903 # ``tag``: the tag used for a DOM object using the
2904 # role (required)
2905 # ``attr``: reference to hash whose keys are attribute
2906 # names and whose values are sprintf strings
2907 # that are called with interpreted text
2908 # ``text``: sprintf string called with the interpreted
2909 # text to produce the final text string
2910 # ``alias``: name of another role for which this is a
2911 # synonym
2912 # ``check``: reference to subroutine to call to check
2913 # validity of the interpreted text. The
2914 # arguments to the routine are (interpreted
2915 # text, literal of interpreted/role
2916 # combination, DOM object for future parent,
2917 # source file name, line number).
2919 # Built-in handler for admonition directives.
2920 # Arguments: directive name, parent, source, line number, directive text,
2921 # literal text
2922 # Returns: array of DOM objects
2923 sub admonition {
2924 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
2925 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
2926 my $subst = $parent->{attr}{name}
2927 if $parent->{tag} eq 'substitution_definition';
2929 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
2930 return $dhash if ref($dhash) eq 'DOM';
2931 my $content = $dhash->{content} ne '' ? $dhash->{content} :
2932 $dhash->{args};
2933 return RST::system_message(3, $source, $lineno,
2934 qq(The "$name" admonition is empty; content required.),
2935 $lit)
2936 if $content =~ /^$/;
2937 my $adm = new DOM(lc $name);
2938 if ($name eq 'admonition') {
2939 # A generic admonition
2940 my $ttext = $dhash->{args};
2941 return no_args($name, $source, $lineno, $lit)
2942 if $ttext =~ /^$/;
2943 $adm->{attr}{classes} = [ $dhash->{options}{class} ||
2944 RST::NormalizeId("$name-$ttext") ];
2945 my $title = new DOM ('title');
2946 $title->append(newPCDATA DOM ($ttext));
2947 $adm->append($title);
2949 RST::Paragraphs($adm, $content, $source, $dhash->{content_lineno});
2951 return $adm;
2954 # Built-in handler for class directives.
2955 # Arguments: directive name, parent, source, line number, directive text,
2956 # literal text
2957 # Returns: array of DOM objects
2958 sub class {
2959 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
2960 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
2961 return $dhash if ref($dhash) eq 'DOM';
2963 return RST::system_message(3, $source, $lineno,
2964 qq(The "$name" directive may not have contents.),
2965 $lit)
2966 if $dhash->{content} ne '';
2968 my($args, $options) = map($dhash->{$_}, qw(args options));
2969 return no_args($name, $source, $lineno, $lit) if $args eq '';
2970 return RST::system_message(3, $source, $lineno,
2971 qq(Invalid class attribute value for "$name" directive: "$args".)
2972 , $lit)
2973 unless $args =~ /^[a-z][-a-z0-9]*(?:\s+[a-z][-a-z0-9]*)*$/i;
2975 my $pending = new DOM('pending');
2976 $pending->{internal}{'.transform'} = "docutils.transforms.parts.Class";
2977 my $details = $pending->{internal}{'.details'} = { };
2978 $details->{class} = $args;
2979 @{$pending}{qw(source lineno lit)} = ($source, $lineno, $lit);
2980 my @optlist = ();
2981 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
2982 $lit);
2983 return $err || $pending;
2986 # Built-in handler for compound directive.
2987 # Arguments: directive name, parent, source, line number, directive text,
2988 # literal text
2989 # Returns: array of DOM objects
2990 sub compound {
2991 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
2992 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
2993 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
2994 return $dhash if ref($dhash) eq 'DOM';
2995 my($content, $args, $options) =
2996 map($dhash->{$_}, qw(content args options));
2997 return system_message($name, 3, $source, $lineno,
2998 'no arguments permitted; blank line required before content block.',
2999 $lit)
3000 if $content ne '' && $args ne '';
3002 $content = $args if $content eq '';
3003 return RST::system_message(3, $source, $lineno,
3004 qq(The "$name" compound is empty; content required.),
3005 $lit)
3006 if $content =~ /^$/;
3007 my @optlist = qw(class);
3008 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3009 $lit);
3010 return $err if $err;
3012 my $comp = new DOM($name);
3013 RST::Paragraphs($comp, $content, $source, $dhash->{content_lineno});
3014 $comp->{attr}{classes} = [ $options->{class} ]
3015 if defined $options->{class};
3017 return $comp;
3020 # Built-in handler for contents directives.
3021 # Arguments: directive name, parent, source, line number, directive text,
3022 # literal text
3023 # Returns: array of DOM objects
3024 sub contents {
3025 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3026 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3027 return $dhash if ref($dhash) eq 'DOM';
3029 return RST::system_message(3, $source, $lineno,
3030 qq(The "contents" directive may not be used within topics or body elements.),
3031 $lit)
3032 if $parent->{tag} !~ /^(section|document|sidebar)$/;
3034 my($args, $options) =
3035 map($dhash->{$_}, qw(args options));
3037 # Create the topic
3038 my $topic = new DOM('topic', classes=>[ 'contents' ]);
3039 my $ttext = $args ne '' ? $args : 'Contents';
3040 push @{$topic->{attr}{classes}}, 'local' if defined $options->{local};
3041 $topic->{attr}{ids} = [ RST::NormalizeId($ttext) ];
3042 $topic->{attr}{names} = [ RST::NormalizeName($ttext) ];
3043 RST::RegisterName($topic, $source, $lineno);
3044 if ($args ne '' || ! defined $options->{local}) {
3045 my $title = new DOM('title');
3046 my $fake = new DOM('fake');
3047 RST::Paragraphs($fake, $ttext, $source, $lineno);
3048 my $last = $fake->last();
3049 if ($fake->num_contents() == 1 && $last->{tag} eq 'paragraph') {
3050 $title->append($last->contents());
3052 $topic->append($title);
3055 my $pending = new DOM('pending');
3056 $topic->append($pending);
3057 $pending->{internal}{'.transform'} =
3058 "docutils.transforms.parts.Contents";
3059 $pending->{source} = $source;
3060 $pending->{lineno} = $lineno;
3061 my @optlist = qw(depth local backlinks);
3062 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3063 $lit);
3064 return $err if $err;
3066 # Save away the enclosing section for local toc
3067 $pending->{section} = $RST::SEC_DOM[-1] if defined $options->{local};
3068 my $opt;
3069 foreach $opt (sort keys %$options) {
3070 my $str = $options->{$opt};
3071 if ($opt eq 'local') {
3072 return bad_option($name, $opt, $str, 3, $source, $lineno,
3073 qq(no argument is allowed; "$str" supplied.),
3074 $lit)
3075 if $str ne '';
3077 elsif ($opt eq 'depth') {
3078 my $err = check_int_option($name, $opt, $str, $str, $source,
3079 $lineno, $lit);
3080 return $err if $err;
3082 elsif ($opt eq 'backlinks') {
3083 my @vallist = qw(top entry none);
3084 my $err = check_enum_option($name, $opt, $str, \@vallist,
3085 $source, $lineno, $lit);
3086 return $err if $err;
3087 substr($str,0,1) =~ tr/a-z/A-Z/;
3089 $pending->{internal}{'.details'}{$opt} = $str;
3091 return $topic;
3094 # Built-in handler for decoration directive.
3095 # Arguments: directive name, parent, source, line number, directive text,
3096 # literal text
3097 # Returns: array of DOM objects
3098 sub decoration {
3099 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3100 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3101 # return system_message($name, 3, $source, $lineno,
3102 # "directive must be used before any body blocks.",
3103 # $lit)
3104 # if $parent->{tag} ne 'document';
3105 my $topdom = $RST::TOPDOM;
3106 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3107 return $dhash if ref($dhash) eq 'DOM';
3108 return system_message($name, 3, $source, $lineno,
3109 'no arguments permitted; blank line required before content block.',
3110 $lit)
3111 if $dhash->{content} ne '' && $dhash->{args} ne '';
3113 # See if there's already the right kind of block under <decoration>
3114 my $dec = $topdom->{content}[0];
3115 if (! defined $dec || $dec->{tag} ne 'decoration') {
3116 $dec = new DOM('decoration');
3117 $topdom->prepend($dec);
3119 my ($block) = grep $_->{tag} eq $name, $dec->contents();
3120 if (! defined $block) {
3121 $block = new DOM($name);
3122 if ($name eq 'header') {
3123 $dec->prepend($block);
3125 else {
3126 $dec->append($block);
3129 my $content = $dhash->{content} ne '' ? $dhash->{content} :
3130 $dhash->{args};
3131 if ($content =~ /^$/) {
3132 RST::Paragraphs($block, qq(Problem with the "$name" directive: no content supplied.),
3133 $source, $dhash->{content_lineno});
3134 return RST::system_message(2, $source, $lineno,
3135 qq(Content block expected for the "$name" directive; none found.),
3136 $lit);
3139 RST::Paragraphs($block, $content, $source, $dhash->{content_lineno});
3141 return;
3144 # Built-in handler for default-role directive.
3145 # Arguments: directive name, parent, source, line number, directive text,
3146 # literal text
3147 # Returns: array of DOM objects
3148 sub default_role {
3149 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3150 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3151 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3152 return $dhash if ref($dhash) eq 'DOM';
3153 return system_message($name, 3, $source, $lineno,
3154 'no content block permitted.',
3155 $lit)
3156 if $dhash->{content} ne '';
3158 my $role = $dhash->{args};
3159 return RST::UnknownRole($role, $source, $lineno, $lit)
3160 unless $role eq '' || defined $RST::MY_ROLES{$role};
3161 $RST::MY_DEFAULT_ROLE = $role ? $role : $RST::DEFAULT_ROLE;
3163 return;
3166 # Built-in handler for figure directives.
3167 # Arguments: directive name, parent, source, line number, directive text,
3168 # literal text
3169 # Returns: array of DOM objects
3170 sub figure {
3171 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3172 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3173 return $dhash if ref($dhash) eq 'DOM';
3174 my($content, $content_lineno, $options) =
3175 map($dhash->{$_}, qw(content content_lineno options));
3177 if (defined $options->{align}) {
3178 my @vallist = qw(left center right);
3179 my $err = check_enum_option($name, 'align', $options->{align},
3180 \@vallist, $source, $lineno, $lit);
3181 return $err if $err;
3184 my $cline = $content_lineno - $lineno;
3185 my @dline = split(/\n/, $dtext);
3186 my $dline = join("\n", @dline[0 .. $cline-1]);
3187 my %myopts = (figwidth=>'width', figclass=>'@classes', align=>'align');
3188 my $image = image("image", $parent, $source, $lineno, $dline, $lit,
3189 keys %myopts);
3190 return $image if $image->{tag} eq 'system_message';
3192 my @dom;
3193 my $figure = new DOM(lc $name);
3194 $figure->append($image);
3195 push(@dom, $figure);
3196 foreach (keys %myopts) {
3197 if (defined $options->{$_}) {
3198 if ($myopts{$_} =~ /^@(.*)/) {
3199 $figure->{attr}{$1} = [ $options->{$_} ];
3201 else {
3202 $figure->{attr}{$myopts{$_}} = $options->{$_};
3207 my $caption = '';
3208 my $legend = '';
3209 my $legend_lineno;
3210 if ((my @s = split /^(\n+)/m, $content, 2) > 1) {
3211 $caption = $s[0];
3212 $legend = $s[-1];
3213 if ($legend ne "") {
3214 my $pre = "$s[0]$s[1]";
3215 $legend_lineno = $content_lineno + ($pre =~ s/(\n)/\n/g);
3218 else { $caption = $content }
3219 if ($caption !~ /^(..)?$/) {
3220 my $capdom = new DOM('caption');
3221 my $fake = new DOM('fake');
3222 RST::Paragraphs($fake, $caption, $source, $content_lineno);
3223 my $last = $fake->last();
3224 if ($fake->num_contents() == 1 && $last->{tag} eq 'paragraph') {
3225 $capdom->append($last->contents());
3226 $figure->append($capdom);
3228 else {
3229 # This wasn't a simple paragraph
3230 push(@dom, RST::system_message(3, $source, $lineno,
3231 "Figure caption must be a paragraph or empty comment.",
3232 $lit));
3235 if ($legend ne '') {
3236 my $legdom = new DOM('legend');
3237 $figure->append($legdom);
3238 RST::Paragraphs($legdom, $legend, $source, $legend_lineno);
3240 return @dom;
3243 # Built-in handler for image directives.
3244 # Arguments: directive name, parent, source, line number, directive text,
3245 # literal text, list of extra options allowed
3246 # Returns: array of DOM objects
3247 sub image {
3248 my($name, $parent, $source, $lineno, $dtext, $lit, @extra_opts) = @_;
3249 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3250 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3251 return $dhash if ref($dhash) eq 'DOM';
3252 my($args, $content, $options) =
3253 map($dhash->{$_}, qw(args content options));
3255 return system_message($name, 3, $source, $lineno,
3256 "no content permitted.", $dtext)
3257 if ($content ne '' && $name eq 'image');
3258 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 1);
3259 return $err if $err;
3260 $args =~ s/[ \n]//g;
3262 # Process the options
3263 my @optlist = (qw(width height scale alt usemap target align class),
3264 @extra_opts);
3265 $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3266 $lit);
3267 return $err if $err;
3268 if (defined $options->{scale}) {
3269 my $scale = $options->{scale};
3270 my $err = check_int_option($name, 'scale', $scale, $scale, $source,
3271 $lineno, $lit, 0);
3272 return $err if $err;
3274 if (defined (my $align = $options->{align})) {
3275 my $in_subst = $parent->{tag} eq 'substitution_definition';
3276 my @svals = qw(top middle bottom);
3277 my @ovals = qw(left center right);
3278 my @vals = $in_subst ? @svals : @ovals;
3279 my $err = check_enum_option($name, 'align', $align, [@svals, @ovals],
3280 $source, $lineno, $lit);
3281 return $err if $err;
3282 if (! grep($_ eq $align, @vals)) {
3283 my $subst = $in_subst ? ' within a substitution definition' : '';
3284 my $vals = join(', ', map(qq("$_"), @vals));
3285 return RST::system_message(3, $source, $lineno,
3286 qq(Error in "$name" directive: "$align" is not a valid value for the "align" option$subst. Valid values for "align" are: $vals.),
3287 $lit);
3290 foreach my $opt (qw(height width)) {
3291 my $err = check_units_option($name, $opt, $options->{$opt},
3292 $source, $lineno, $lit,
3293 $opt eq 'width' ? ('%') : ())
3294 if defined $options->{$opt};
3295 return $err if $err;
3297 foreach my $opt (qw(target)) {
3298 my $err = check_required_option($name, $opt, $options->{$opt},
3299 $source, $lineno, $lit)
3300 if defined $options->{$opt};
3301 return $err if $err;
3303 if (defined $options->{class}) {
3304 $options->{classes} = [ $options->{class} ];
3305 delete $options->{class};
3308 my %attr;
3309 my $alt = '';
3310 $alt = $parent->{attr}{names}[0]
3311 if $parent->{tag} eq 'substitution_definition';
3312 $attr{alt} = $alt if $alt ne '';
3313 delete $options->{$_} foreach (@extra_opts);
3315 my $dom = new DOM (lc $name, uri=>$args, %attr, %$options);
3316 if (my $target = $options->{target}) {
3317 delete $dom->{attr}{target};
3318 my $newdom = new DOM ('reference');
3319 if ($target =~ /\`(.*)\`_$/s || $target =~ /^(\S+)_$/) {
3320 # Indirect target
3321 (my $refname = $1) =~ s/\n/ /g;
3322 $target =~ s/\n/ /g;
3323 $newdom->{attr}{name} = RST::NormalizeName($target);
3324 $newdom->{attr}{refname} = $refname;
3326 else {
3327 $target =~ s/\n//g;
3328 $newdom->{attr}{refuri} = $target;
3330 $newdom->append($dom);
3331 $dom = $newdom;
3333 return $dom;
3336 # Built-in handler for include directives.
3337 # Arguments: directive name, parent, source, line number, directive text,
3338 # literal text
3339 # Returns: array of DOM objects
3340 sub include {
3341 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3342 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3343 return $dhash if ref($dhash) eq 'DOM';
3344 my($args, $options) = map($dhash->{$_}, qw(args options));
3346 my @exts = split(/:/, $main::opt_D{includeext});
3347 my $mydir = $source =~ m|(.*)/| ? $1 : ".";
3348 my $path = $main::opt_D{includepath};
3349 $path =~ s/<\.>/$mydir/;
3350 my @dirs = map(m|^\./?$| ? "" : m|/$| ? $_ : "$_/",split(/:/, $path));
3351 $args =~ s/^<(.*)>$/$1/;
3352 my $file = $args;
3353 my $dir;
3354 foreach $dir (@dirs) {
3355 my @files = map("$dir$args$_", @exts);
3356 my @foundfiles = grep(-r $_, @files);
3357 if (@foundfiles) {
3358 $file = $foundfiles[0];
3359 last;
3362 my $text;
3363 print STDERR "Debug: $source, $lineno: Including $file\n" if $main::opt_d;
3364 if (open(FILE,$file)) {
3365 $text = join('',<FILE>);
3366 # TODO:
3367 # use Encode qw/encode decode/;
3368 # $text = decode($options->{encoding}, $text)
3369 # if defined $options->{encoding};
3370 if (defined $options->{literal}) {
3371 my $lb = new DOM('literal_block', %RST::XML_SPACE, source=>$file);
3372 $lb->append(newPCDATA DOM($text));
3373 return $lb;
3375 else {
3376 RST::Paragraphs($parent, $text, $file, 1) if defined $text;
3379 else {
3380 my $err = "IOError: " . system_error();
3381 return RST::system_message(4, $source, $lineno,
3382 qq(Problems with "$name" directive path:\n$err: '$args'.),
3383 $lit);
3385 return;
3388 # Built-in handler for line-block directives.
3389 # Arguments: directive name, parent, source, line number, directive text,
3390 # literal text
3391 # Returns: array of DOM objects
3392 sub line_block {
3393 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3394 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3395 return $dhash if ref($dhash) eq 'DOM';
3396 my($args, $content, $content_lineno, $options) =
3397 map($dhash->{$_}, qw(args content content_lineno options));
3398 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 0);
3399 return $err if $err;
3400 my @optlist = qw(class);
3401 $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3402 $lit);
3403 return $err if $err;
3404 return RST::system_message(2, $source, $lineno,
3405 qq(Content block expected for the "$name" directive; none found.),
3406 $lit) unless $content;
3407 $content =~ s/^/| /gm;
3408 my ($proc, @doms) = RST::LineBlock($content, $source, $content_lineno);
3409 if ($options->{class}) {
3410 my ($lb) = grep($_->{tag} eq 'line_block', @doms);
3411 $lb->{attr}{classes} = [ $options->{class} ] if $lb;
3413 return grep(ref $_ eq 'DOM', @doms);
3416 # Built-in handler for meta directives.
3417 # Arguments: directive name, parent, source, line number, directive text,
3418 # literal text
3419 # Returns: array of DOM objects
3420 sub meta {
3421 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3422 my @doms;
3423 my $para = $dtext;
3424 $para =~ s/^.*\n//;
3425 return RST::system_message(3, $source, $lineno,
3426 "Empty meta directive.", $lit)
3427 unless $para ne '';
3428 $para =~ /^( *)/;
3429 my $spaces = $1;
3430 $para =~ s/^$spaces//gm;
3431 my $lines = 1;
3432 while ($para =~ /^:([^:\n]+): *(.*)/s) {
3433 my $optlit = $para;
3434 my ($next, $field) = ('');
3435 ($field, $para) = ($1, $2);
3436 # See if there are any subsequent field list items
3437 if ((my @s = split /^(?!\A)($RST::FIELD_LIST|\S)/om, $para, 2) > 1) {
3438 $para = $s[0];
3439 $next = "$s[1]$s[-1]";
3441 # Remove initial spaces
3442 my @spaces = $para =~ /^(?!\A)( +)/mg;
3443 my $spaces = defined $spaces[0] ? $spaces[0] : '';
3444 foreach (@spaces) {
3445 $spaces = $_ if length($_) < length($spaces);
3447 $para =~ s/^$spaces//mg;
3449 return RST::system_message(1, $source, $lineno+$lines,
3450 qq(No content for meta tag "$field".),
3451 $optlit)
3452 if $para =~ /^$/;
3453 my $pending = new DOM('pending');
3454 push(@doms, $pending);
3455 $pending->{internal}{'.transform'} =
3456 "docutils.transforms.components.Filter";
3457 $pending->{source} = $source;
3458 $pending->{lineno} = $lineno;
3459 $pending->{internal}{'.details'}{component} = "'writer'";
3460 $pending->{internal}{'.details'}{format} = "'html'";
3461 my $opt = $field;
3462 $opt =~ s/^([\w\.-]+)(?:=([\w\.-]+))?\s*//;
3463 my ($name, $nametag);
3464 $nametag = 'name' unless defined $nametag;
3465 if (defined $2) {
3466 ($nametag, $name) = ($1, $2);
3468 else {
3469 ($nametag, $name) = ('name', $1);
3471 my @attr = split(/\s*;\s*/, $opt);
3472 my %attr;
3473 foreach (@attr) {
3474 if (/(.*)=(.*)/) {
3475 $attr{$1} = $2;
3477 else {
3478 return
3479 RST::system_message(3, $source, $lineno+$lines,
3480 qq(Error parsing meta tag attribute "$_": missing "=".),
3481 $optlit);
3484 my $content = $para;
3485 chomp $content;
3486 $content =~ s/\n/ /g;
3487 my $dom = new DOM('meta', content=>$content,
3488 $nametag=>$name, %attr);
3489 $pending->{internal}{'.details'}{nodes} = $dom;
3491 $lines += $para =~ tr/\n//;
3492 $para = $next;
3495 push (@doms,
3496 RST::system_message(3, $source, $lineno,
3497 "Invalid meta directive.", $lit))
3498 if $para ne '';
3500 return @doms;
3503 # Built-in handler for parsed-literal directives.
3504 # Arguments: directive name, parent, source, line number, directive text,
3505 # literal text
3506 # Returns: array of DOM objects
3507 sub parsed_literal {
3508 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3509 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3510 return $dhash if ref($dhash) eq 'DOM';
3511 my($args, $content, $content_lineno, $options) =
3512 map($dhash->{$_}, qw(args content content_lineno options));
3513 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 0);
3514 return $err if $err;
3515 my @optlist = qw();
3516 $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3517 $lit);
3518 return $err if $err;
3519 my $lb = new DOM('parsed_literal', %RST::XML_SPACE);
3520 my @errs = RST::Inline($lb, $content, $source, $content_lineno);
3521 return $lb, @errs;
3524 # Built-in handler for raw directives.
3525 # Arguments: directive name, parent, source, line number, directive text,
3526 # literal text
3527 # Returns: array of DOM objects
3528 sub raw {
3529 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3530 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3531 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3532 return $dhash if ref($dhash) eq 'DOM';
3533 my($args, $content, $options) =
3534 map($dhash->{$_}, qw(args content options));
3536 my @optlist = qw(file url);
3537 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3538 $lit);
3539 return $err if $err;
3541 return RST::system_message(3, $source, $lineno,
3542 qq("$name" directive may not both specify an external file and have content.),
3543 $lit)
3544 if defined $options->{file} && $content ne '';
3545 return RST::system_message(3, $source, $lineno,
3546 qq(The "file" and "url" options may not be simultaneously specified for the "$name" directive.),
3547 $lit)
3548 if defined $options->{file} && defined $options->{url};
3549 return RST::system_message(4, $source, $lineno,
3550 qq(The "url" option is not yet implemented for the "$name" directive.),
3551 $lit)
3552 if defined $options->{url};
3554 my %attr;
3555 if (defined $options->{file}) {
3556 $source =~ m|(.*/)|;
3557 my $opt = $options->{file};
3558 my $dir = $1;
3559 my @files = ("$dir$opt", "$dir$opt.rst", "$dir$opt.txt");
3560 my @foundfiles = grep(-r $_, @files);
3561 my $file = @foundfiles ? $foundfiles[0] : $args;
3562 my $text;
3563 if (open(FILE,$file)) {
3564 $content = join('',<FILE>);
3565 $attr{source} = $file;
3567 else {
3568 my $err = "IOError: " . system_error();
3569 return RST::system_message(4, $source, $lineno,
3570 qq(Problems with "$name" directive path:\n$err: '$args'.),
3571 $lit);
3575 my $dom = new DOM('raw', format=>$args, %RST::XML_SPACE, %attr);
3576 chomp $content;
3577 $dom->append(newPCDATA DOM($content));
3579 return $dom;
3582 # Built-in handler for replace directives.
3583 # Arguments: directive name, parent, source, line number, directive text,
3584 # literal text
3585 # Returns: array of DOM objects
3586 sub replace {
3587 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3588 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3589 return $dhash if ref($dhash) eq 'DOM';
3590 my($args, $content) = map($dhash->{$_}, qw(args content));
3592 my $fake = new DOM('fake');
3593 my $text = $args;
3594 $text .= "\n$content" if defined $content;
3595 return RST::system_message(3, $source, $lineno,
3596 qq(The "$name" directive is empty; content required.))
3597 if $text =~ /^$/;
3598 return RST::system_message(3, $source, $lineno,
3599 qq(Invalid context: the "$name" directive can only be used within a substitution definition.),
3600 $lit)
3601 unless $parent->{tag} eq 'substitution_definition';
3602 RST::Paragraphs($fake, $text, $source, $lineno);
3603 my $last = $fake->last();
3604 if ($fake->num_contents() == 1 && $last->{tag} =~ 'paragraph') {
3605 my $content = $fake->{content}[0];
3606 my $contents = $content->{content};
3607 if (@$contents && $contents->[-1]{tag} eq '#PCDATA') {
3608 chomp $contents->[-1]{text};
3610 return $last->contents();
3612 else {
3613 # This wasn't a simple paragraph
3614 return
3615 grep($_->{tag} eq 'system_message' && do {
3616 delete $_->{attr}{backrefs}; 1}, $fake->contents()),
3617 RST::system_message(3, $source, $lineno,
3618 qq(Error in "$name" directive: may contain a single paragraph only.));
3622 # Built-in handler for role directives.
3623 # Arguments: directive name, parent, source, line number, directive text,
3624 # literal text
3625 # Returns: array of DOM objects
3626 sub role {
3627 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3628 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3629 return $dhash if ref($dhash) eq 'DOM';
3631 my($args, $options) =
3632 map($dhash->{$_}, qw(args options));
3634 my ($role, $tag) = $args =~ /^([^\( ]*)(?:\s*\(\s*(.*?)\s*\))?/;
3635 return RST::UnknownRole($tag, $source, $lineno, $lit)
3636 if defined $tag && !defined $RST::MY_ROLES{$tag};
3637 my $msg = RST::DefineRole($role, $tag, %$options);
3638 $msg = $msg =~ /invalid/i ?
3639 qq(Error in "$name" directive:\n$msg) :
3640 qq(Invalid argument for "$name" directive:\n$msg)
3641 if $msg;
3643 return RST::system_message(3, $source, $lineno, $msg, $lit)
3644 if $msg;
3645 return;
3648 # Built-in handler for sectnum directives.
3649 # Arguments: directive name, parent, source, line number, directive text,
3650 # literal text
3651 # Returns: array of DOM objects
3652 sub sectnum {
3653 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3654 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3655 return $dhash if ref($dhash) eq 'DOM';
3656 my($args, $content, $options) =
3657 map($dhash->{$_}, qw(args content options));
3659 return system_message($name, 3, $source, $lineno,
3660 "no content permitted.", $dtext)
3661 if ($content ne '');
3662 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 0);
3663 return $err if $err;
3664 my @optlist = qw(depth prefix start suffix);
3665 $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3666 $lit);
3667 return $err if $err;
3668 foreach my $optname (qw(depth start)) {
3669 if (defined $options->{$optname}) {
3670 my $opt = $options->{$optname};
3671 my $err = check_int_option($name, $optname, $opt, $opt, $source,
3672 $lineno, $lit, 0);
3673 return $err if $err;
3677 my $pending = new DOM('pending');
3678 $pending->{internal}{'.transform'} =
3679 "docutils.transforms.parts.Sectnum";
3680 %{$pending->{internal}{'.details'}} = %$options;
3681 return $pending;
3684 # Built-in handler for rubric directives.
3685 # Arguments: directive name, parent, source, line number, directive text,
3686 # literal text
3687 # Returns: array of DOM objects
3688 sub rubric {
3689 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3690 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3691 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3692 return $dhash if ref($dhash) eq 'DOM';
3694 (my $args = $dhash->{args}) =~ s/\n/ /g;
3696 return no_args($name, $source, $lineno, $lit) if $args eq '';
3697 return system_message($name, 3, $source, $lineno,
3698 qq(no content permitted.), $lit)
3699 if $dhash->{content} ne '';
3701 my $rub = new DOM($name);
3702 $rub->append(newPCDATA DOM($args));
3704 return $rub;
3707 # Built-in handler for sidebar directives.
3708 # Arguments: directive name, parent, source, line number, directive text,
3709 # literal text
3710 # Returns: array of DOM objects
3711 sub sidebar {
3712 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3713 #use PrintVar;PrintVar::PrintVar(\@_);print "\n";
3714 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3715 return $dhash if ref($dhash) eq 'DOM';
3716 my($args, $content, $options, $content_lineno) =
3717 map($dhash->{$_}, qw(args content options content_lineno));
3719 $args =~ s/\n//g;
3720 my @optlist = qw(subtitle);
3721 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
3722 $lit);
3723 return $err if $err;
3725 return RST::system_message(3, $source, $lineno,
3726 qq(The "$name" directive may not be used within a sidebar element.),
3727 $lit)
3728 if $parent->{tag} eq 'sidebar';
3729 return RST::system_message(3, $source, $lineno,
3730 qq(The "$name" directive may not be used within topics or body elements.),
3731 $lit)
3732 unless $parent->{tag} =~ /section|document/;
3734 my $sb = new DOM($name);
3735 my $title = new DOM('title');
3736 $sb->append($title);
3737 $title->append(newPCDATA DOM($args));
3738 if (defined $options->{subtitle}) {
3739 my $st = new DOM('subtitle');
3740 $st->append(newPCDATA DOM($options->{subtitle}));
3741 $sb->append($st);
3743 RST::Paragraphs($sb, $content, $source, $content_lineno);
3744 return $sb;
3747 # Built-in handler for table directives.
3748 # Arguments: directive name, parent, source, line number, directive text,
3749 # literal text
3750 # Returns: array of DOM objects
3751 sub table {
3752 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
3753 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
3754 return $dhash if ref($dhash) eq 'DOM';
3755 my($args, $content, $content_lineno, $options) =
3756 map($dhash->{$_}, qw(args content content_lineno options));
3758 my %common_opts = (class=>'@classes', align=>'align');
3759 my %myopts = ($name eq 'csv-table' ?
3760 (widths=>'', 'header-rows'=>'', 'stub-columns'=>'',
3761 'header'=>'') :
3763 %common_opts);
3764 if (defined $options->{align}) {
3765 my @vallist = qw(left center right);
3766 my $err = check_enum_option($name, 'align', $options->{align},
3767 \@vallist, $source, $lineno, $lit);
3768 return $err if $err;
3771 my @dom;
3772 my $table = new DOM('table');
3773 $table->{tableattr} = $main::opt_D{tableattr};
3774 push(@dom, $table);
3775 foreach my $opt (keys %myopts) {
3776 if (defined $myopts{$opt} && $myopts{$opt} ne '' && $options->{$opt}) {
3777 if ($myopts{$opt} =~ /^@(.*)/) {
3778 $table->{attr}{$1} = [ $options->{$opt} ];
3780 else {
3781 $table->{attr}{$myopts{$opt}} = $options->{$opt};
3784 # $table->{attr}{$myopts{$opt}} = $options->{$opt}
3787 my $title = $args;
3788 if ($title !~ /^(..)?$/) {
3789 my $titledom = new DOM('title');
3790 my $fake = new DOM('fake');
3791 RST::Paragraphs($fake, $title, $source, $lineno);
3792 push @dom, grep($_->{tag} ne 'paragraph', $fake->contents());
3793 my @paras = grep($_->{tag} eq 'paragraph', $fake->contents());
3794 if (@paras == 1 && $paras[0]{tag} eq 'paragraph') {
3795 $titledom->append($paras[0]->contents());
3796 $table->append($titledom);
3800 if ($name eq 'csv-table') {
3801 return RST::system_message(3, $source, $lineno,
3802 qq("$name" directive may not both specify an external file and have content.),
3803 $lit)
3804 if $dhash->{content} ne '' && defined $options->{file};
3805 return RST::system_message(3, $source, $lineno,
3806 qq(The "file" and "url" options may not be simultaneously specified for the "$name" directive.),
3807 $lit)
3808 if defined $options->{file} && defined $options->{url};
3809 my $content = $dhash->{content};
3810 if (defined $options->{file}) {
3811 if (open CSV, $options->{file}) {
3812 $content = join '', <CSV>;
3813 return RST::system_message(3, $source, $lineno,
3814 qq(No table data detected in CSV file.),
3815 $lit)
3816 if $content eq '';
3818 else {
3819 my $err = system_error();
3820 return RST::system_message(4, $source, $lineno,
3821 qq(Problems with "$name" directive path:\n$err: '$options->{file}'.),
3822 $lit);
3824 my $encoding = $options->{encoding} || '';
3825 if ($encoding eq 'latin-1') {
3826 return RST::system_message(3, $source, $lineno,
3827 qq(Error with CSV data in "$name" directive:\nstring with NUL bytes), $lit)
3828 if $content =~ /\000/;
3830 elsif ($encoding ne '') {
3831 use Encode qw(encode decode);
3832 $content = decode($encoding, $content);
3835 elsif (defined $options->{url}) {
3836 my $url = $options->{url};
3837 return RST::system_message(4, $source, $lineno,
3838 qq(Problems with "$name" directive URL "$url":\nunknown url type: $url.)
3839 , $lit)
3840 unless $url =~ /^(($URIre::scheme):(?:$URIre::hier_part|$URIre::opaque_part))/o;
3841 return RST::system_message(4, $source, $lineno,
3842 qq(The "url" option is not yet implemented for the "$name" directive.),
3843 $lit)
3844 if defined $options->{url};
3846 my $delim = $options->{delim};
3847 if (! defined $delim) {
3848 $delim = ',';
3850 elsif ($delim eq 'space') {
3851 $delim = ' ';
3853 elsif ($delim =~ /^0x([\da-f]{2})/i) {
3854 $delim = chr hex $1;
3856 elsif ($delim =~ /^U\+(\d+)/) {
3857 return bad_option($name, 'delim', $delim, 3, $source, $lineno,
3858 qq(code too large (long int too large to convert to int).),
3859 $lit)
3860 if $1 > 0xffffffff;
3863 elsif (length($delim) ne 1) {
3864 return bad_option($name, 'delim', $delim, 3, $source, $lineno,
3865 qq('$delim' invalid; must be a single character or a Unicode code.),
3866 $lit);
3868 $delim = "\Q$delim";
3869 return RST::system_message(2, $source, $lineno,
3870 qq(The "$name" directive requires content; none supplied.),
3871 $lit)
3872 if $content eq '' &&
3873 ! defined $options->{file} && ! defined $options->{url};
3874 my %lines;
3875 my $rows;
3876 $rows = ParseCSV($content, \%lines, $delim);
3877 return RST::system_message(3, $source, $lineno,
3878 qq(Error with CSV data in "$name" directive:\n$rows), $lit)
3879 unless ref($rows) eq 'ARRAY';
3880 return $rows if ref($rows) eq 'DOM';
3881 my $cols = 0;
3882 grep (do {$cols = @$_ if @$_ > $cols}, @$rows);
3883 my $tg = new DOM('tgroup', cols=>$cols);
3884 $table->append($tg);
3885 my @widths;
3886 if (defined (my $widths = $options->{widths})) {
3887 @widths = split /[, ]\s*/, $widths;
3888 return RST::system_message(3, $source, $lineno,
3889 qq("$name" widths do not match the number of columns in table ($cols).),
3890 $lit)
3891 if @widths != $cols;
3892 foreach (@widths) {
3893 my $err = check_int_option($name, 'widths', $_, $widths,
3894 $source, $lineno, $lit, 1);
3895 return $err if $err;
3898 return RST::system_message(3, $source, $lineno,
3899 qq($options->{'stub-columns'} stub column(s) specified but only $cols column(s) of data supplied ("$name" directive).),
3900 $lit)
3901 if ($options->{'stub-columns'} || 0) > $cols;
3902 return RST::system_message(3, $source, $lineno,
3903 qq(Insufficient data supplied ($cols column(s)); no data remaining for table body, required by "$name" directive.),
3904 $lit)
3905 if ($options->{'stub-columns'} || 0) == $cols;
3906 for (my $i=0; $i < $cols; $i++) {
3907 my $cs = new DOM('colspec');
3908 $tg->append($cs);
3909 $cs->{attr}{colwidth} = $i < @widths ? $widths[$i] :
3910 int(100/$cols);
3911 $cs->{attr}{stub} = 1 if defined $options->{'stub-columns'} &&
3912 $i < $options->{'stub-columns'};
3914 my $heads;
3915 if (defined $options->{'header'}) {
3916 $heads = ParseCSV($options->{'header'}, \%lines, $delim);
3917 return RST::system_message(3, $source, $lineno,
3918 qq(Error with CSV data in "$name" directive:\n$heads), $lit)
3919 unless ref($heads) eq 'ARRAY';
3921 return RST::system_message(3, $source, $lineno,
3922 qq($options->{'header-rows'} header row(s) specified but only @{[0+@$rows]} row(s) of data supplied ("$name" directive).),
3923 $lit)
3924 if ($options->{'header-rows'} || 0) > @$rows;
3925 return RST::system_message(3, $source, $lineno,
3926 qq(Insufficient data supplied (@{[0+@$rows]} row(s)); no data remaining for table body, required by "$name" directive.),
3927 $lit)
3928 if ($options->{'header-rows'} || 0) == @$rows;
3929 my @heads = defined $heads ? @$heads : $options->{'header-rows'}
3930 ? splice(@$rows, 0, $options->{'header-rows'}) : ();
3932 foreach my $section (qw(thead tbody)) {
3933 my $hb_rows = $section eq 'thead' ? \@heads : $rows;
3934 next unless @$hb_rows;
3935 my $sec = new DOM($section);
3936 $tg->append($sec);
3937 foreach my $row (@$hb_rows) {
3938 my $r = new DOM('row');
3939 $sec->append($r);
3940 for (my $entry = 0; $entry < $cols; $entry++) {
3941 my $e = new DOM('entry');
3942 $r->append($e);
3943 my $lines = $lines{$row}[$entry] || 0;
3944 RST::Paragraphs($e, $row->[$entry], $source,
3945 $content_lineno+$lines);
3946 $e->{entryattr} = $main::opt_D{entryattr}
3947 if defined $main::opt_D{entryattr} &&
3948 $main::opt_D{entryattr} ne '';
3953 elsif ($name eq 'list-table') {
3954 my $rows = ParseListTable($content, $source, $content_lineno);
3955 return RST::system_message(3, $source, $lineno,
3956 qq(Error parsing content block for the "$name" directive: $rows.), $lit)
3957 unless ref($rows) eq 'ARRAY';
3958 return RST::system_message(3, $source, $lineno,
3959 qq($options->{'header-rows'} header row(s) specified but only @{[0+@$rows]} row(s) of data supplied ("$name" directive).),
3960 $lit)
3961 if ($options->{'header-rows'} || 0) > @$rows;
3962 return RST::system_message(3, $source, $lineno,
3963 qq(Insufficient data supplied (@{[0+@$rows]} row(s)); no data remaining for table body, required by "$name" directive.),
3964 $lit)
3965 if ($options->{'header-rows'} || 0) == @$rows;
3966 my $cols = @{$rows->[0]};
3967 my $tg = new DOM('tgroup', cols=>$cols);
3968 $table->append($tg);
3969 my @widths;
3970 if (defined (my $widths = $options->{widths})) {
3971 @widths = split /[, ]\s*/, $widths;
3972 return RST::system_message(3, $source, $lineno,
3973 qq("$name" widths do not match the number of columns in table ($cols).),
3974 $lit)
3975 if @widths != $cols;
3976 foreach (@widths) {
3977 my $err = check_int_option($name, 'widths', $_, $widths,
3978 $source, $lineno, $lit, 1);
3979 return $err if $err;
3982 return RST::system_message(3, $source, $lineno,
3983 qq($options->{'stub-columns'} stub column(s) specified but only $cols column(s) of data supplied ("$name" directive).),
3984 $lit)
3985 if ($options->{'stub-columns'} || 0) > $cols;
3986 return RST::system_message(3, $source, $lineno,
3987 qq(Insufficient data supplied ($cols column(s)); no data remaining for table body, required by "$name" directive.),
3988 $lit)
3989 if ($options->{'stub-columns'} || 0) == $cols;
3990 for (my $i=0; $i < $cols; $i++) {
3991 my $cs = new DOM('colspec');
3992 $tg->append($cs);
3993 $cs->{attr}{colwidth} = $i < @widths ? $widths[$i] :
3994 int(100/$cols);
3995 $cs->{attr}{stub} = 1 if defined $options->{'stub-columns'} &&
3996 $i < $options->{'stub-columns'};
3998 my $heads;
3999 my @heads = defined $heads ? @$heads : $options->{'header-rows'}
4000 ? splice(@$rows, 0, $options->{'header-rows'}) : ();
4002 foreach my $section (qw(thead tbody)) {
4003 my $hb_rows = $section eq 'thead' ? \@heads : $rows;
4004 next unless @$hb_rows;
4005 my $sec = new DOM($section);
4006 $tg->append($sec);
4007 foreach my $row (@$hb_rows) {
4008 my $r = new DOM('row');
4009 $sec->append($r);
4010 for (my $entry = 0; $entry < $cols; $entry++) {
4011 my $e = new DOM('entry');
4012 $r->append($e);
4013 $e->append(@{$row->[$entry]});
4014 $e->{entryattr} = $main::opt_D{entryattr}
4015 if defined $main::opt_D{entryattr} &&
4016 $main::opt_D{entryattr} ne '';
4021 else {
4022 return RST::system_message(2, $source, $lineno,
4023 qq(The "$name" directive requires content; none supplied.),
4024 $lit)
4025 if $content eq '';
4026 my $fake = new DOM('fake');
4027 RST::Paragraphs($fake, $content, $source, $content_lineno);
4028 $table->append($fake->{content}[0]->contents())
4029 if $fake->{content}[0]{tag} eq 'table';
4031 return @dom;
4034 # Parses a comma-separated-value (CSV) table
4035 # Arguments: text string, ref to lines hash, delimiter
4036 # Returns: reference to an array of parsed rows, each of which is a
4037 # reference to an array of strings for the elements of that row
4038 # OR a string error message
4039 # Side-effects: Sets the lines hash to have as keys the row reference
4040 # and as values a reference to an array containing the
4041 # relative line number where each row entry starts (for
4042 # giving better error messages);
4043 sub ParseCSV {
4044 my ($string, $lines, $delim) = @_;
4045 my @rows;
4046 my @split = split /[ ]*(\".*?\"|[^$delim\n]+)/s, $string;
4047 shift @split;
4048 my $row;
4049 my $line = 0;
4050 while (my($val, $sep) = splice @split, 0, 2) {
4051 $sep = '' if ! defined $sep;
4052 push @rows, ($row = []) unless $row;
4053 return "newline inside string" if $val =~ /^\"[^\"]*$/;
4054 $val = $1 if $val =~ /^\"(.*)\"\s*$/s;
4055 push @$row, $val;
4056 push @{$lines->{$row}}, $line;
4057 $row = undef if $sep =~ /\n/;
4058 $line += ($val =~ tr/\n//) + ($sep =~ tr/\n//);
4060 return \@rows;
4063 # Parses a list table
4064 # Arguments: text string, source file, source line number
4065 # Returns: reference to an array of parsed rows, each of which is a
4066 # reference to an array of references to of array of DOM objects
4067 # for the elements of that row
4068 # OR a string error message
4069 sub ParseListTable {
4070 my ($string, $source, $lineno) = @_;
4071 my @rows;
4072 my $fake = new DOM('fake');
4073 RST::Paragraphs($fake, $string, $source, $lineno);
4074 my $cols = 0;
4075 my $bl1 = $fake->{content}[0];
4076 return "exactly one bullet list expected"
4077 if $bl1->{tag} ne 'bullet_list' ||
4078 grep($_->{tag} eq 'bullet_list', $fake->contents()) > 1;
4079 for (my $row=0; $row < $bl1->num_contents(); $row++) {
4080 my $li1 = $bl1->{content}[$row];
4081 my $bl2 = $li1->{content}[0];
4082 return "two-level bullet list expected, but row @{[$row+1]} does not contain a second-level bullet list"
4083 unless $li1->{tag} eq 'list_item' && $bl2->{tag} eq 'bullet_list';
4084 return "uniform two-level bullet list expected, but row @{[$row+1]} does not contain the same number of items as row 1 (@{[$bl2->num_contents()]} vs $cols)"
4085 if $row > 0 && $bl2->num_contents() ne $cols;
4086 for (my $col=0; $col < $bl2->num_contents(); $col++) {
4087 push @{$rows[$row]}, $bl2->{content}[$col]{content};
4089 $cols = $bl2->contents() if $row eq 0;
4092 return \@rows;
4095 # Built-in handler for target-notes directives.
4096 # Arguments: directive name, parent, source, line number, directive text,
4097 # literal text
4098 # Returns: array of DOM objects
4099 sub target_notes {
4100 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
4101 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
4102 return $dhash if ref($dhash) eq 'DOM';
4103 my($args, $content, $content_lineno, $options) =
4104 map($dhash->{$_}, qw(args content content_lineno options));
4105 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 0);
4106 return $err if $err;
4107 my @optlist = qw();
4108 $err = check_option_names($name, $options, \@optlist, $source, $lineno,
4109 $lit);
4110 return $err if $err;
4111 return system_message($name, 3, $source, $lineno,
4112 "no content permitted.", $dtext)
4113 if ($content ne '');
4115 my $pending = new DOM('pending');
4116 $pending->{internal}{'.transform'} =
4117 "docutils.transforms.references.TargetNotes";
4118 %{$pending->{internal}{'.details'}{depth}} = %$options
4119 if defined $options;
4120 return $pending;
4123 # Built-in handler for test_directive directives.
4124 # Arguments: directive name, parent, source, line number, directive text,
4125 # literal text
4126 # Returns: array of DOM objects
4127 sub test_directive {
4128 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
4129 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
4130 return $dhash if ref($dhash) eq 'DOM';
4131 my($dname, $args, $content, $options) =
4132 map($dhash->{$_}, qw(name args content options));
4134 my $contlit = $content;
4135 my $contstr = $content eq '' ? ' None' : '';
4136 $args = "'$args'" if $args ne '';
4137 my $opt;
4138 foreach $opt (sort keys %$options) {
4139 my $err = check_required_option($dname, $opt, $options->{$opt},
4140 $source, $lineno, $lit);
4141 return $err if $err;
4143 my $optstring =
4144 join('; ', map(do { my ($opt, $val) = ($_, $options->{$_});
4145 $val =~ s/\n/\\n/g;
4146 "'$opt': '$val'"; }, sort keys %$options));
4147 return RST::system_message(1, $source, $lineno,
4148 qq(Directive processed. Type="$dname", arguments=[$args], options={$optstring}, content:$contstr),
4149 $contlit);
4152 # Built-in handler for title directives.
4153 # Arguments: directive name, parent, source, line number, directive text,
4154 # literal text
4155 # Returns: array of DOM objects
4156 sub title {
4157 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
4158 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
4159 return $dhash if ref($dhash) eq 'DOM';
4160 my $args = $dhash->{args};
4161 return no_args($name, $source, $lineno, $lit) if $args eq '';
4162 return system_message($name, 3, $source, $lineno,
4163 "no content permitted.", $dtext)
4164 if ($dhash->{content} ne '');
4165 $RST::TOPDOM->{'.details'}{title} = $args;
4166 return;
4169 # Built-in handler for topic directives.
4170 # Arguments: directive name, parent, source, line number, directive text,
4171 # literal text
4172 # Returns: array of DOM objects
4173 sub topic {
4174 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
4175 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
4176 return $dhash if ref($dhash) eq 'DOM';
4177 my($args, $content, $content_lineno) =
4178 map($dhash->{$_}, qw(args content content_lineno));
4180 my ($err) = arg_check($name, $source, $lineno, $args, $lit, 1);
4181 return $err if $err;
4182 return RST::system_message(2, $source, $lineno,
4183 qq(Content block expected for the "$name" directive; none found.),
4184 $lit)
4185 if $content =~ /^$/;
4187 my $topic = new DOM('topic');
4188 my $title = new DOM('title');
4189 $topic->append($title);
4190 $title->append(newPCDATA DOM($args));
4191 RST::Paragraphs($topic, $content, $source, $content_lineno);
4192 return $topic;
4195 # Built-in handler for unicode directives.
4196 # Arguments: directive name, parent, source, line number, directive text,
4197 # literal text
4198 # Returns: array of DOM objects
4199 sub unicode {
4200 my($name, $parent, $source, $lineno, $dtext, $lit) = @_;
4201 my $dhash = parse_directive($dtext, $lit, $source, $lineno);
4202 return $dhash if ref($dhash) eq 'DOM';
4203 my($args, $content, $options) =
4204 map($dhash->{$_}, qw(args content options));
4206 my $fake = new DOM('fake');
4207 my $text = $args;
4208 return no_args($name, $source, $lineno, $lit) if $text eq '';
4209 return RST::system_message(3, $source, $lineno,
4210 qq(Invalid context: the "$name" directive can only be used within a substitution definition.),
4211 $lit)
4212 unless $parent->{tag} eq 'substitution_definition';
4213 my @optlist = qw(trim ltrim rtrim);
4214 my $err = check_option_names($name, $options, \@optlist, $source, $lineno,
4215 $lit);
4216 return $err if $err;
4218 # Remove comments
4219 $text =~ s/\s*\.\..*//;
4221 my @chars = split /\s+/, $text;
4222 for (my $c=0; $c < @chars; $c++) {
4223 my $char = $chars[$c];
4224 if ($char =~ /^(?:0x|\\u|U[+]?)([\da-f]+)$/i ||
4225 $char =~/^&\#x([\da-f]+);$/i) {
4226 my $hex = $1;
4227 $hex =~ s/^0+//;
4228 my $icc = "Invalid character code: $char";
4229 return RST::system_message(3, $source, $lineno,
4230 qq($icc\nValueError: code too large (unichr(int("$1", 16)))),
4231 $lit)
4232 if length $hex > 8;
4233 my $dec = hex $hex;
4234 return RST::system_message(3, $source, $lineno,
4235 qq($icc\nunichr(int("$1", 16))),
4236 $lit)
4237 if $dec > 0xffff;
4238 $chars[$c] = chr($dec);
4241 @$options{keys %$options} = (1) x keys %$options;
4242 if ($options->{trim}) {
4243 $options->{ltrim} = $options->{rtrim} = $options->{trim};
4244 delete $options->{trim};
4246 @{$parent->{attr}}{keys %$options} = values %$options;
4247 return @chars ? newPCDATA DOM(join "\n", @chars) : ();
4250 ################ RST::Directive internal routines ################
4252 # INTERNAL ROUTINE.
4253 # Returns a system error if a directive argument check fails.
4254 # May also be useful to plug-in directives.
4255 # Arguments: name, source, lineno, args, lit text, expected # of args
4256 # Returns: DOM if the check fails
4257 sub arg_check {
4258 my ($name, $source, $lineno, $args, $lit, $exp) = @_;
4260 my @dom;
4261 my (@args) = split(/\s+/, $args);
4262 my $got = @args;
4263 push(@dom, system_message($name, 3, $source, $lineno,
4264 "$exp argument(s) required, $got supplied.",
4265 $lit))
4266 unless $got >= $exp;
4268 return @dom;
4271 # INTERNAL ROUTINE.
4272 # Returns a system message specifying a bad option.
4273 # May also be useful to plug-in directives.
4274 # Arguments: directive name, option name, option value, level, source,
4275 # line number, message, optional literal block
4276 # Returns: system_message DOM
4277 sub bad_option {
4278 my($name, $opt, $val, $level, $source, $lineno, $msg, $lit) = @_;
4280 my $valstr = $val eq '' ? "None" : "'$val'";
4281 return
4282 system_message($name, $level, $source, $lineno,
4283 qq(invalid option value: (option: "$opt"; value: $valstr)\n$msg),
4284 $lit);
4287 # INTERNAL ROUTINE.
4288 # Returns a system message DOM if the option value does not parse as one
4289 # of an enumerated list.
4290 # May also be useful to plug-in directives.
4291 # Arguments: directive name, option name, option value, ref to array of
4292 # enum names, source, line number, literal
4293 # Returns: error DOM or None
4294 sub check_enum_option {
4295 my($name, $opt, $val, $vallist, $source, $lineno, $lit) = @_;
4297 my $search = join("|", @$vallist);
4298 if ($val !~ /^($search)$/i) {
4299 my @list = map(qq("$_"), @$vallist);
4300 $list[-1] =~ s/^/or /;
4301 my $list = join(', ',@list);
4302 return bad_option($name, $opt, $val, 3, $source, $lineno,
4303 qq(must supply an argument; choose from $list.),
4304 $lit)
4305 if $val eq '';
4306 return bad_option($name, $opt, $val, 3, $source, $lineno,
4307 qq("$val" unknown; choose from $list.), $lit);
4309 return;
4312 # INTERNAL ROUTINE.
4313 # Returns a system message DOM if the option value does not parse as integer.
4314 # May also be useful to plug-in directives.
4315 # Arguments: directive name, option name, option value, complete option string,
4316 # source, line number, literal,
4317 # optional argument indicating minimum OK value (0 or 1)
4318 # Returns: error DOM or None
4319 sub check_int_option {
4320 my($name, $opt, $val, $option, $source, $lineno, $lit, $pos) = @_;
4321 $pos = '' unless defined $pos;
4322 return bad_option($name, $opt, $option, 3, $source, $lineno,
4323 "invalid literal for int(): None.", $lit)
4324 if $val eq '';
4325 return bad_option($name, $opt, $option, 3, $source, $lineno,
4326 "invalid literal for int(): $val.", $lit)
4327 unless $val =~ /^(-\s*)?\d+$/;
4328 return bad_option($name, $opt, $option, 3, $source, $lineno,
4329 "negative value; must be positive or zero.", $lit)
4330 if $pos eq '0' && $val =~ /^-\s*\d+$/;
4331 return bad_option($name, $opt, $option, 3, $source, $lineno,
4332 "negative or zero value; must be positive.", $lit)
4333 if $pos eq '1' && $val < 1;
4334 return;
4337 # INTERNAL ROUTINE.
4338 # Returns a system message DOM if any of the options in the option hash are
4339 # not in the legal option list array.
4340 # May also be useful to plug-in directives.
4341 # Arguments: directive name, reference to option hash, reference to array
4342 # of legal option names, source, line number, literal
4343 sub check_option_names {
4344 my ($name, $options, $optlist, $source, $lineno, $lit) = @_;
4346 my @badoptions = grep(/ /, sort keys %$options);
4347 return system_message($name, 3, $source, $lineno,
4348 qq(invalid option data: extension option field name may not contain multiple words.),
4349 $lit)
4350 if @badoptions;
4352 my %optlist;
4353 @optlist{@$optlist} = (1) x @$optlist;
4354 @badoptions = grep(! $optlist{$_}, sort keys %$options);
4355 return system_message($name, 3, $source, $lineno,
4356 qq(unknown option: "$badoptions[0]".), $lit)
4357 if @badoptions;
4359 return;
4362 # INTERNAL ROUTINE.
4363 # Returns a system message DOM if the option value is empty.
4364 # May also be useful to plug-in directives.
4365 # Arguments: directive name, option name, option value, source, line number,
4366 # literal
4367 # Returns: error DOM or none
4368 sub check_required_option {
4369 my($name, $opt, $val, $source, $lineno, $lit) = @_;
4371 return bad_option($name, $opt, $val, 3, $source, $lineno,
4372 qq(argument required but none supplied.), $lit)
4373 if $val eq '';
4376 # INTERNAL ROUTINE.
4377 # Returns a system message DOM if the option value does not parse as a
4378 # positive measure of a defined unit.
4379 # May also be useful to plug-in directives.
4380 # Arguments: directive name, option name, option value, source, line number,
4381 # literal, list of extra units
4382 # Returns: error DOM or none
4383 # Uses globals: @RST::UNITS
4384 # Causes side-effects: option value has whitespace removed if valid
4385 sub check_units_option {
4386 my($name, $opt, $val, $source, $lineno, $lit, @extra_units) = @_;
4388 my $search = join("|", @RST::UNITS, @extra_units);
4389 if ($_[2] !~ s/^(\d+(?:\.\d*)?|\.\d+)\s*($search)\s*$/$1$2/i) {
4390 return bad_option($name, $opt, $val, 3, $source, $lineno,
4391 qq(must supply an argument.), $lit)
4392 if $val eq '';
4393 my $units = join ' ', map(qq("$_"), @RST::UNITS, @extra_units);
4394 return bad_option($name, $opt, $val, 3, $source, $lineno,
4395 qq(not a positive measure of one of the following units:\n$units.),
4396 $lit);
4400 # Adds a handler for a directive. Used by plug-in directives to
4401 # register a routine to call for a given directive name.
4402 # Arguments: directive name, reference to directive subroutine
4403 sub handle_directive {
4404 my($name, $sub) = @_;
4406 $RST::DIRECTIVES{lc $name} = $sub;
4409 # INTERNAL ROUTINE.
4410 # Returns a system message when no arguments were supplied
4411 # Arguments: directive name, level, source, lineno, literal string,
4412 # Returns: system_message DOM
4413 sub no_args {
4414 my ($name, $source, $lineno, $lit) = @_;
4415 return system_message($name, 3, $source, $lineno,
4416 qq(1 argument(s) required, 0 supplied.), $lit);
4420 # INTERNAL ROUTINE.
4421 # Parses the text of a directive into its arguments, options, and contents.
4422 # May also be useful to plug-in directives.
4423 # Arguments: Directive text, literal text, source, lineno
4424 # Returns: Error DOM or `directive arguments hash ref`_
4425 sub parse_directive {
4426 my($dtext, $lit, $source, $lineno) = @_;
4428 my ($pre, $directive, $body) = $dtext =~ /(\s*)([\w\.-]+)\s*:: *(.*)/s;
4429 my $dname = $directive;
4430 $directive =~ tr/[A-Z].-/[a-z]__/;
4432 # Parse the body into arguments, options, and content
4433 my $args;
4434 if ((my @s = split /(^ *($RST::FIELD_LIST|::)|\n\n)/mo, $body, 2) > 1) {
4435 $args = $s[0];
4436 $body = "$s[1]$s[-1]";
4438 else {
4439 $args = $body;
4440 $body = "";
4443 my $content_lineno = $lineno + ($args =~ tr/\n//);
4444 $args =~ s/^\n//;
4445 $args =~ s/^ //mg;
4446 $args =~ s/\n$//;
4447 if (! defined $args && $body !~ /^ *($RST::FIELD_LIST|::)/o
4448 && (my @s = split /^(\n(?:\n+))/m, $lit, 2) > 1) {
4449 # Any ensuing paragraph is a block quote
4450 $lit = "$s[0]\n";
4451 $body =~ /^\n\n/m;
4452 $content_lineno += 2;
4453 $body = "$s[0]\n";
4455 my $spaces = " ";
4456 my %options;
4457 $body =~ s/^$spaces//mg;
4458 my $err = 0;
4459 if (defined $args || $body =~ /^ *($RST::FIELD_LIST:::)/o) {
4460 my $options;
4461 if ((my @s = split /^(\n+)/m, $body, 2) > 1) {
4462 $options = "$s[0]";
4463 my $pre = "$s[0]$s[1]";
4464 $body = "$s[-1]";
4465 $content_lineno += ($pre =~ tr/\n//);
4467 else {
4468 $options = $body;
4469 $body = "";
4471 my @options = split /^(?=:)/m, $options;
4472 my $option;
4473 foreach $option (@options) {
4474 my ($opt,$val) = $option =~ /^:([^:\n]*): *(.*)/s;
4475 return system_message($directive, 3, $source, $lineno,
4476 "invalid option block.", $lit)
4477 if $opt eq '';
4478 if (defined $options{$opt}) {
4479 $err = 1;
4480 return
4481 system_message($directive, 3, $source, $lineno,
4482 qq(invalid option data: duplicate option "$opt".),
4483 $lit);
4485 chomp $val;
4486 if ($val =~ /^(?!\A)( *)/m) {
4487 my $spaces = $1;
4488 $val =~ s/^$spaces//gm;
4490 $options{$opt} = $val;
4493 #print "[$args][",join(',',map("$_=>$options{$_}",sort keys %options)),"][$body]\n";
4495 my $dhash = {name=>$dname, args=>$args, content=>$body,
4496 content_lineno=>$content_lineno};
4497 $dhash->{options} = \%options if %options;
4498 return $dhash;
4501 # INTERNAL ROUTINE.
4502 # Returns a canonically formatted version of the last system error.
4503 # Arguments: None
4504 # Returns: error string
4505 sub system_error {
4506 return "[Errno " . ($!+0) . "] $!";
4509 # INTERNAL ROUTINE.
4510 # Returns a DOM object for a system message, with 'Error in "name" directive"
4511 # prepended to the message.
4512 # May also be useful to plug-in directives.
4513 # Arguments: directive name, level, source, lineno, message, literal string,
4514 # attribute pairs
4515 sub system_message {
4516 my ($name, $level, $source, $lineno, $msg, $lit, %attr) = @_;
4517 return RST::system_message($level, $source, $lineno,
4518 qq(Error in "$name" directive:\n$msg), $lit,
4519 %attr);
4522 package Util;
4524 # Does a "deep" copy of a data structure
4525 # Inputs: variable
4526 # Returns: deep copy of variable
4527 sub DeepCopy {
4528 my($var) = @_;
4529 return $var if ref($var) eq '';
4530 if ("$var" =~ /HASH/) {
4531 my(%val);
4532 @val{keys %$var} = map(DeepCopy($_),values %$var);
4533 return \%val;
4535 elsif ("$var" =~ /ARRAY/) {
4536 my(@val) = map(DeepCopy($_), @$var);
4537 return \@val;
4539 elsif ("$var" =~ /SCALAR/) {
4540 my($val) = DeepCopy($$var);
4541 return \$val;
4543 elsif ("$var" =~ /CODE/) {
4544 return $var;
4546 else {
4547 my $val = "$var";
4548 return $val;