3 # Script to turn PCRE man pages into HTML
6 # Subroutine to handle font changes and other escapes
11 $s =~ s/</</g; # Deal with < and >
13 $s =~ s
"\\fI(.*?)\\f[RP]"<i
>$1</i
>"g;
14 $s =~ s"\\fB
(.*?
)\\f
[RP
]"<b>$1</b>"g
;
16 $s =~ s/(?<=Copyright )\(c\)/©/g;
20 # Subroutine to ensure not in a paragraph
25 print TEMP "</PRE
>\n" if ($inpre);
32 # Subroutine to start a new paragraph
50 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
52 $toc = 1 if $ARGV[0] eq "-toc
";
56 # Initial output to STDOUT
61 <title>$ARGV[0] specification</title>
63 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64 <h1
>$ARGV[0] man page
</h1
>
66 Return to the
<a href
="index.html">PCRE
index page
</a
>.
69 This page is part of the PCRE HTML documentation
. It was generated automatically
70 from the original man page
. If there is any nonsense
in it
, please consult the
71 man page
, in case the conversion went wrong
.
75 print "<ul>\n" if ($toc);
77 open(TEMP
, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
81 # Handle lines beginning with a dot
85 # Some of the PCRE man pages used to contain instances of .br. However,
86 # they should have all been removed because they cause trouble in some
87 # (other) automated systems that translate man pages to HTML. Complain if
88 # we find .br or .in (another macro that is deprecated).
90 if (/^\.br/ || /^\.in/)
92 print STDERR
"\n*** Deprecated macro encountered - rewrite needed\n";
93 print STDERR
"*** $_\n";
94 die "*** Processing abandoned\n";
97 # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
109 # Handling .sp is subtle. If it is inside a literal section, do nothing if
110 # the next line is a non literal text line; similarly, if not inside a
111 # literal section, do nothing if a literal follows, unless we are inside
112 # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
113 # literal sections will do the spacing. Always skip if no previous output.
122 print TEMP
"\n" if (/^[\s.]/);
126 print TEMP
"<br>\n<br>\n" if ($innf || !/^[\s.]/);
128 redo; # Now process the lookahead line we just read
131 elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
135 elsif (/^\.SH\s*("?)(.*)\1/)
137 # Ignore the NAME section
145 my($title) = &do_line
($2);
148 printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
150 printf TEMP
("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
156 print TEMP
"<br><b>\n$title\n</b><br>\n";
159 elsif (/^\.SS\s*("?)(.*)\1/)
162 my($title) = &do_line
($2);
163 print TEMP
"<br><b>\n$title\n</b><br>\n";
165 elsif (/^\.B\s*(.*)/)
167 &new_para
() if (!$inpara);
170 print TEMP
"<b>$_</b>\n";
173 elsif (/^\.I\s*(.*)/)
175 &new_para
() if (!$inpara);
178 print TEMP
"<i>$_</i>\n";
182 # A comment that starts "HREF" takes the next line as a name that
183 # is turned into a hyperlink, using the text given, which might be
184 # in a special font. If it ends in () or (digits) or punctuation, they
185 # aren't part of the link.
187 elsif (/^\.\\"\s*HREF/)
193 $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?
(?
:\
(\d
+\
))?
[.,;:]?
$/;
194 print TEMP
"<a href=\"$1.html\">$_</a>\n";
197 # A comment that starts "HTML" inserts literal HTML
199 elsif (/^\.\\"\s*HTML\s*(.*)/)
204 # A comment that starts < inserts that HTML at the end of the
205 # *next* input line - so as not to get a newline between them.
207 elsif (/^\.\\"\s*(<.*>)/)
214 print TEMP
"$_$markup\n";
217 # A comment that starts JOIN joins the next two lines together, with one
218 # space between them. Then that line is processed. This is used in some
219 # displays where two lines are needed for the "man" version. JOINSH works
220 # the same, except that it assumes this is a shell command, so removes
221 # continuation backslashes.
223 elsif (/^\.\\"\s*JOIN(SH)?/)
228 $one =~ s/\s*\\e\s*$// if (defined($1));
232 redo; # Process the joined lines
235 # .EX/.EE are used in the pcredemo page to bracket the entire program,
236 # which is unmodified except for turning backslash into "\e".
240 print TEMP
"<PRE>\n";
252 # Ignore anything not recognized
257 # Line does not begin with a dot. Replace blank lines with new paragraphs
261 &end_para
() if ($wrotetext);
265 # Convert fonts changes and output an ordinary line. Ensure that indented
266 # lines are marked as literal.
269 &new_para
() if (!$inpara);
275 print TEMP
"<pre>\n";
281 print TEMP
"</pre>\n";
285 # Add <br> to the end of a non-literal line if we are within .nf/.fi
287 $_ .= "<br>\n" if (!$inpre && $innf);
293 # The TOC, if present, will have been written - terminate it
295 print "</ul>\n" if ($toc);
297 # Copy the remainder to the standard output
300 open(TEMP
, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
302 print while (<TEMP
>);
306 Return to the <a href="index.html">PCRE index page</a>.