Make a branch to make krunner Good Enough For Aaron™.
[kdebase/uwolfer.git] / runtime / kioslave / man / man2html.cpp
bloba3cd27eb824708f23c4ca986d23020a111de0303
1 /*
2 This file is part of the KDE libraries
4 Copyright (C) 2005 Nicolas GOUTTE <goutte@kde.org>
6 ### TODO: who else?
7 */
9 // Start of verbatim comment
12 ** This program was written by Richard Verhoeven (NL:5482ZX35)
13 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl
15 ** Permission is granted to distribute, modify and use this program as long
16 ** as this comment is not removed or changed.
19 // End of verbatim comment
22 * man2html-linux-1.0/1.1
23 * This version modified for Redhat/Caldera linux - March 1996.
24 * Michael Hamilton <michael@actrix.gen.nz>.
26 * man2html-linux-1.2
27 * Added support for BSD mandoc pages - I didn't have any documentation
28 * on the mandoc macros, so I may have missed some.
29 * Michael Hamilton <michael@actrix.gen.nz>.
31 * vh-man2html-1.3
32 * Renamed to avoid confusion (V for Verhoeven, H for Hamilton).
34 * vh-man2html-1.4
35 * Now uses /etc/man.config
36 * Added support for compressed pages.
37 * Added "length-safe" string operations for client input parameters.
38 * More secure, -M secured, and client input string lengths checked.
43 ** If you want to use this program for your WWW server, adjust the line
44 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option.
46 ** You have to adjust the built-in manpath to your local system. Note that
47 ** every directory should start and end with the '/' and that the first
48 ** directory should be "/" to allow a full path as an argument.
50 ** The program first check if PATH_INFO contains some information.
51 ** If it does (t.i. man2html/some/thing is used), the program will look
52 ** for a manpage called PATH_INFO in the manpath.
54 ** Otherwise the manpath is searched for the specified command line argument,
55 ** where the following options can be used:
57 ** name name of manpage (csh, printf, xv, troff)
58 ** section the section (1 2 3 4 5 6 7 8 9 n l 1v ...)
59 ** -M path an extra directory to look for manpages (replaces "/")
61 ** If man2html finds multiple manpages that satisfy the options, an index
62 ** is displayed and the user can make a choice. If only one page is
63 ** found, that page will be displayed.
65 ** man2html will add links to the converted manpages. The function add_links
66 ** is used for that. At the moment it will add links as follows, where
67 ** indicates what should match to start with:
68 ** ^^^
69 ** Recognition Item Link
70 ** ----------------------------------------------------------
71 ** name(*) Manpage ../man?/name.*
72 ** ^
73 ** name@hostname Email address mailto:name@hostname
74 ** ^
75 ** method://string URL method://string
76 ** ^^^
77 ** www.host.name WWW server http://www.host.name
78 ** ^^^^
79 ** ftp.host.name FTP server ftp://ftp.host.name
80 ** ^^^^
81 ** <file.h> Include file file:/usr/include/file.h
82 ** ^^^
84 ** Since man2html does not check if manpages, hosts or email addresses exist,
85 ** some links might not work. For manpages, some extra checks are performed
86 ** to make sure not every () pair creates a link. Also out of date pages
87 ** might point to incorrect places.
89 ** The program will not allow users to get system specific files, such as
90 ** /etc/passwd. It will check that "man" is part of the specified file and
91 ** that "/../" isn't. Even if someone manages to get such file, man2html will
92 ** handle it like a manpage and will usually not produce any output (or crash).
94 ** If you find any bugs when normal manpages are converted, please report
95 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle
96 ** the manpage correct.
98 ** Known bugs and missing features:
100 ** * Equations are not converted at all.
101 ** * Tables are converted but some features are not possible in html.
102 ** * The tabbing environment is converted by counting characters and adding
103 ** spaces. This might go wrong (outside <PRE>)
104 ** * Some manpages rely on the fact that troff/nroff is used to convert
105 ** them and use features which are not descripted in the man manpages.
106 ** (definitions, calculations, conditionals, requests). I can't guarantee
107 ** that all these features work on all manpages. (I didn't have the
108 ** time to look through all the available manpages.)
111 # include <config-runtime.h>
113 #include <ctype.h>
115 #include <unistd.h>
116 #include <string.h>
118 #include <stdio.h>
120 #include <QtCore/QByteArray>
121 #include <QtCore/QDateTime>
122 #include <QtCore/QMap>
123 #include <QtCore/QStack>
124 #include <QtCore/QString>
126 #ifdef SIMPLE_MAN2HTML
127 # include <stdlib.h>
128 # include <iostream>
129 # include <dirent.h>
130 # include <sys/stat.h>
131 # define kDebug(x) cerr
132 # define kWarning(x) cerr << "WARNING "
133 # define BYTEARRAY(x) x.constData()
134 #else
135 # include <QTextCodec>
136 # include <kdebug.h>
137 # include <kdeversion.h>
138 # define BYTEARRAY(x) x
139 #endif
143 #include "man2html.h"
145 using namespace std;
147 #define NULL_TERMINATED(n) ((n) + 1)
149 #define HUGE_STR_MAX 10000
150 #define LARGE_STR_MAX 2000
151 #define MED_STR_MAX 500
152 #define SMALL_STR_MAX 100
153 #define TINY_STR_MAX 10
156 #if 1
157 // The output is current too horrible to be called HTML 4.01
158 #define DOCTYPE "<!DOCTYPE HTML>"
159 #else
160 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
161 #endif
163 /* mdoc(7) Bl/El lists to HTML list types */
164 #define BL_DESC_LIST 1
165 #define BL_BULLET_LIST 2
166 #define BL_ENUM_LIST 4
168 /* mdoc(7) Bd/Ed example(?) blocks */
169 #define BD_LITERAL 1
170 #define BD_INDENT 2
172 static int s_nroff = 1; // NROFF mode by default
174 static int mandoc_name_count = 0; /* Don't break on the first Nm */
176 static char *stralloc(int len)
178 /* allocate enough for len + NULL */
179 char *news = new char [len+1];
180 #ifdef SIMPLE_MAN2HTML
181 if (!news)
183 cerr << "man2html: out of memory" << endl;
184 exit(EXIT_FAILURE);
186 #else
187 // modern compilers do not return a NULL pointer for a new
188 #endif
189 return news;
192 static char *strlimitcpy(char *to, char *from, int n, int limit)
193 { /* Assumes space for limit plus a null */
194 const int len = n > limit ? limit : n;
195 qstrncpy(to, from, len + 1);
196 to[len] = '\0';
197 return to;
200 /* below this you should not change anything unless you know a lot
201 ** about this program or about troff.
205 /// Structure for character definitions
206 struct CSTRDEF {
207 int nr, slen;
208 const char *st;
213 const char NEWLINE[2]="\n";
216 * Class for defining strings and macros
218 class StringDefinition
220 public:
221 StringDefinition( void ) : m_length(0) {}
222 StringDefinition( int len, const char* cstr ) : m_length( len ), m_output( cstr ) {}
223 public:
224 int m_length; ///< Length of output text
225 QByteArray m_output; ///< Defined string
229 * Class for defining number registers
230 * \note Not for internal read-only registers
232 class NumberDefinition
234 public:
235 NumberDefinition( void ) : m_value(0), m_increment(0) {}
236 NumberDefinition( int value ) : m_value( value ), m_increment(0) {}
237 NumberDefinition( int value, int incr) : m_value( value ), m_increment( incr ) {}
238 public:
239 int m_value; ///< value of number register
240 int m_increment; ///< Increment of number register
241 // ### TODO: display form (.af)
245 * Map of character definitions
247 static QMap<QByteArray,StringDefinition> s_characterDefinitionMap;
250 * Map of string variable and macro definitions
251 * \note String variables and macros are the same thing!
253 static QMap<QByteArray,StringDefinition> s_stringDefinitionMap;
256 * Map of number registers
257 * \note Intern number registers (starting with a dot are not handled here)
259 static QMap<QByteArray,NumberDefinition> s_numberDefinitionMap;
261 static void fill_old_character_definitions( void );
264 * Initialize character variables
266 static void InitCharacterDefinitions( void )
268 fill_old_character_definitions();
269 // ### HACK: as we are converting to HTML too early, define characters with HTML references
270 s_characterDefinitionMap.insert( "&lt;-", StringDefinition( 1, "&larr;" ) ); // <-
271 s_characterDefinitionMap.insert( "-&gt;", StringDefinition( 1, "&rarr;" ) ); // ->
272 s_characterDefinitionMap.insert( "&lt;&gt;", StringDefinition( 1, "&harr;" ) ); // <>
273 s_characterDefinitionMap.insert( "&lt;=", StringDefinition( 1, "&le;" ) ); // <=
274 s_characterDefinitionMap.insert( "&gt;=", StringDefinition( 1, "&ge;" ) ); // >=
275 // End HACK
279 * Initialize string variables
281 static void InitStringDefinitions( void )
283 // mdoc-only, see mdoc.samples(7)
284 s_stringDefinitionMap.insert( "<=", StringDefinition( 1, "&le;" ) );
285 s_stringDefinitionMap.insert( ">=", StringDefinition( 1, "&ge;" ) );
286 s_stringDefinitionMap.insert( "Rq", StringDefinition( 1, "&rdquo;" ) );
287 s_stringDefinitionMap.insert( "Lq", StringDefinition( 1, "&ldquo;" ) );
288 s_stringDefinitionMap.insert( "ua", StringDefinition( 1, "&circ" ) ); // Note this is different from \(ua
289 s_stringDefinitionMap.insert( "aa", StringDefinition( 1, "&acute;" ) );
290 s_stringDefinitionMap.insert( "ga", StringDefinition( 1, "`" ) );
291 s_stringDefinitionMap.insert( "q", StringDefinition( 1, "&quot;" ) );
292 s_stringDefinitionMap.insert( "Pi", StringDefinition( 1, "&pi;" ) );
293 s_stringDefinitionMap.insert( "Ne", StringDefinition( 1, "&ne;" ) );
294 s_stringDefinitionMap.insert( "Le", StringDefinition( 1, "&le;" ) );
295 s_stringDefinitionMap.insert( "Ge", StringDefinition( 1, "&ge;" ) );
296 s_stringDefinitionMap.insert( "Lt", StringDefinition( 1, "&lt;" ) );
297 s_stringDefinitionMap.insert( "Gt", StringDefinition( 1, "&gt;" ) );
298 s_stringDefinitionMap.insert( "Pm", StringDefinition( 1, "&plusmn;" ) );
299 s_stringDefinitionMap.insert( "If", StringDefinition( 1, "&infin;" ) );
300 s_stringDefinitionMap.insert( "Na", StringDefinition( 3, "NaN" ) );
301 s_stringDefinitionMap.insert( "Ba", StringDefinition( 1, "|" ) );
302 // end mdoc-only
303 // man(7)
304 s_stringDefinitionMap.insert( "Tm", StringDefinition( 1, "&trade;" ) ); // \*(TM
305 s_stringDefinitionMap.insert( "R", StringDefinition( 1, "&reg;" ) ); // \*R
306 // end man(7)
307 // Missing characters from man(7):
308 // \*S "Change to default font size"
309 #ifndef SIMPLE_MAN2HTML
310 // Special KDE KIO man:
311 const QByteArray kdeversion(KDE_VERSION_STRING);
312 s_stringDefinitionMap.insert( ".KDE_VERSION_STRING", StringDefinition( kdeversion.length(), kdeversion ) );
313 #endif
317 * Initialize number registers
318 * \note Internal read-only registers are not handled here
320 static void InitNumberDefinitions( void )
322 // As the date number registers are more for end-users, better choose local time.
323 // Groff seems to support Gregorian dates only
324 QDate today( QDate::currentDate() );
325 s_numberDefinitionMap.insert( "year", today.year() ); // Y2K-correct year
326 s_numberDefinitionMap.insert( "yr", today.year() - 1900 ); // Y2K-incorrect year
327 s_numberDefinitionMap.insert( "mo", today.month() );
328 s_numberDefinitionMap.insert( "dy", today.day() );
329 s_numberDefinitionMap.insert( "dw", today.dayOfWeek() );
333 #define V(A,B) ((A)*256+(B))
335 //used in expand_char, e.g. for "\(bu"
336 // see groff_char(7) for list
337 static CSTRDEF standardchar[] = {
338 { V('*','*'), 1, "*" },
339 { V('*','A'), 1, "&Alpha;" },
340 { V('*','B'), 1, "&Beta;" },
341 { V('*','C'), 1, "&Xi;" },
342 { V('*','D'), 1, "&Delta;" },
343 { V('*','E'), 1, "&Epsilon;" },
344 { V('*','F'), 1, "&Phi;" },
345 { V('*','G'), 1, "&Gamma;" },
346 { V('*','H'), 1, "&Theta;" },
347 { V('*','I'), 1, "&Iota;" },
348 { V('*','K'), 1, "&Kappa;" },
349 { V('*','L'), 1, "&Lambda;" },
350 { V('*','M'), 1, "&Mu:" },
351 { V('*','N'), 1, "&Nu;" },
352 { V('*','O'), 1, "&Omicron;" },
353 { V('*','P'), 1, "&Pi;" },
354 { V('*','Q'), 1, "&Psi;" },
355 { V('*','R'), 1, "&Rho;" },
356 { V('*','S'), 1, "&Sigma;" },
357 { V('*','T'), 1, "&Tau;" },
358 { V('*','U'), 1, "&Upsilon;" },
359 { V('*','W'), 1, "&Omega;" },
360 { V('*','X'), 1, "&Chi;" },
361 { V('*','Y'), 1, "&Eta;" },
362 { V('*','Z'), 1, "&Zeta;" },
363 { V('*','a'), 1, "&alpha;"},
364 { V('*','b'), 1, "&beta;"},
365 { V('*','c'), 1, "&xi;"},
366 { V('*','d'), 1, "&delta;"},
367 { V('*','e'), 1, "&epsilon;"},
368 { V('*','f'), 1, "&phi;"},
369 { V('*','g'), 1, "&gamma;"},
370 { V('*','h'), 1, "&theta;"},
371 { V('*','i'), 1, "&iota;"},
372 { V('*','k'), 1, "&kappa;"},
373 { V('*','l'), 1, "&lambda;"},
374 { V('*','m'), 1, "&mu;" },
375 { V('*','n'), 1, "&nu;"},
376 { V('*','o'), 1, "&omicron;"},
377 { V('*','p'), 1, "&pi;"},
378 { V('*','q'), 1, "&psi;"},
379 { V('*','r'), 1, "&rho;"},
380 { V('*','s'), 1, "&sigma;"},
381 { V('*','t'), 1, "&tau;"},
382 { V('*','u'), 1, "&upsilon;"},
383 { V('*','w'), 1, "&omega;"},
384 { V('*','x'), 1, "&chi;"},
385 { V('*','y'), 1, "&eta;"},
386 { V('*','z'), 1, "&zeta;"},
387 { V('+','-'), 1, "&plusmn;" }, // not in groff_char(7)
388 { V('+','f'), 1, "&phi;"}, // phi1, we use the standard phi
389 { V('+','h'), 1, "&theta;"}, // theta1, we use the standard theta
390 { V('+','p'), 1, "&omega;"}, // omega1, we use the standard omega
391 { V('1','2'), 1, "&frac12;" },
392 { V('1','4'), 1, "&frac14;" },
393 { V('3','4'), 1, "&frac34;" },
394 { V('F','i'), 1, "&#xFB03;" }, // ffi ligature
395 { V('F','l'), 1, "&#xFB04;" }, // ffl ligature
396 { V('a','p'), 1, "~" },
397 { V('b','r'), 1, "|" },
398 { V('b','u'), 1, "&bull;" },
399 { V('b','v'), 1, "|" },
400 { V('c','i'), 1, "&#x25CB;" }, // circle ### TODO verify
401 { V('c','o'), 1, "&copy;" },
402 { V('c','t'), 1, "&cent;" },
403 { V('d','e'), 1, "&deg;" },
404 { V('d','g'), 1, "&dagger;" },
405 { V('d','i'), 1, "&divide;" },
406 { V('e','m'), 1, "&emdash;" },
407 { V('e','n'), 1, "&endash;"},
408 { V('e','q'), 1, "=" },
409 { V('e','s'), 1, "&empty;" },
410 { V('f','f'), 1, "&#0xFB00;" }, // ff ligature
411 { V('f','i'), 1, "&#0xFB01;" }, // fi ligature
412 { V('f','l'), 1, "&#0xFB02;" }, // fl ligature
413 { V('f','m'), 1, "&prime;" },
414 { V('g','a'), 1, "`" },
415 { V('h','y'), 1, "-" },
416 { V('l','c'), 2, "|&#175;" }, // ### TODO: not in groff_char(7)
417 { V('l','f'), 2, "|_" }, // ### TODO: not in groff_char(7)
418 { V('l','k'), 1, "<FONT SIZE=+2>{</FONT>" }, // ### TODO: not in groff_char(7)
419 { V('m','i'), 1, "-" }, // ### TODO: not in groff_char(7)
420 { V('m','u'), 1, "&times;" },
421 { V('n','o'), 1, "&not;" },
422 { V('o','r'), 1, "|" },
423 { V('p','l'), 1, "+" },
424 { V('r','c'), 2, "&#175;|" }, // ### TODO: not in groff_char(7)
425 { V('r','f'), 2, "_|" }, // ### TODO: not in groff_char(7)
426 { V('r','g'), 1, "&reg;" },
427 { V('r','k'), 1, "<FONT SIZE=+2>}</FONT>" }, // ### TODO: not in groff_char(7)
428 { V('r','n'), 1, "&oline;" },
429 { V('r','u'), 1, "_" },
430 { V('s','c'), 1, "&sect;" },
431 { V('s','l'), 1, "/" },
432 { V('s','q'), 2, "&#x25A1" }, // WHITE SQUARE
433 { V('t','s'), 1, "&#x03C2;" }, // FINAL SIGMA
434 { V('u','l'), 1, "_" },
435 { V('-','D'), 1, "&ETH;" },
436 { V('S','d'), 1, "&eth;" },
437 { V('T','P'), 1, "&THORN;" },
438 { V('T','p'), 1, "&thorn;" },
439 { V('A','E'), 1, "&AElig;" },
440 { V('a','e'), 1, "&aelig;" },
441 { V('O','E'), 1, "&OElig;" },
442 { V('o','e'), 1, "&oelig;" },
443 { V('s','s'), 1, "&szlig;" },
444 { V('\'','A'), 1, "&Aacute;" },
445 { V('\'','E'), 1, "&Eacute;" },
446 { V('\'','I'), 1, "&Iacute;" },
447 { V('\'','O'), 1, "&Oacute;" },
448 { V('\'','U'), 1, "&Uacute;" },
449 { V('\'','Y'), 1, "&Yacute;" },
450 { V('\'','a'), 1, "&aacute;" },
451 { V('\'','e'), 1, "&eacute;" },
452 { V('\'','i'), 1, "&iacute;" },
453 { V('\'','o'), 1, "&oacute;" },
454 { V('\'','u'), 1, "&uacute;" },
455 { V('\'','y'), 1, "&yacute;" },
456 { V(':','A'), 1, "&Auml;" },
457 { V(':','E'), 1, "&Euml;" },
458 { V(':','I'), 1, "&Iuml;" },
459 { V(':','O'), 1, "&Ouml;" },
460 { V(':','U'), 1, "&Uuml;" },
461 { V(':','a'), 1, "&auml;" },
462 { V(':','e'), 1, "&euml;" },
463 { V(':','i'), 1, "&iuml;" },
464 { V(':','o'), 1, "&ouml;" },
465 { V(':','u'), 1, "&uuml;" },
466 { V(':','y'), 1, "&yuml;" },
467 { V('^','A'), 1, "&Acirc;" },
468 { V('^','E'), 1, "&Ecirc;" },
469 { V('^','I'), 1, "&Icirc;" },
470 { V('^','O'), 1, "&Ocirc;" },
471 { V('^','U'), 1, "&Ucirc;" },
472 { V('^','a'), 1, "&acirc;" },
473 { V('^','e'), 1, "&ecirc;" },
474 { V('^','i'), 1, "&icirc;" },
475 { V('^','o'), 1, "&ocirc;" },
476 { V('^','u'), 1, "&ucirc;" },
477 { V('`','A'), 1, "&Agrave;" },
478 { V('`','E'), 1, "&Egrave;" },
479 { V('`','I'), 1, "&Igrave;" },
480 { V('`','O'), 1, "&Ograve;" },
481 { V('`','U'), 1, "&Ugrave;" },
482 { V('`','a'), 1, "&agrave;" },
483 { V('`','e'), 1, "&egrave;" },
484 { V('`','i'), 1, "&igrave;" },
485 { V('`','o'), 1, "&ograve;" },
486 { V('`','u'), 1, "&ugrave;" },
487 { V('~','A'), 1, "&Atilde;" },
488 { V('~','N'), 1, "&Ntilde;" },
489 { V('~','O'), 1, "&Otilde;" },
490 { V('~','a'), 1, "&atilde" },
491 { V('~','n'), 1, "&ntidle;" },
492 { V('~','o'), 1, "&otidle;" },
493 { V(',','C'), 1, "&Ccedil;" },
494 { V(',','c'), 1, "&ccedil;" },
495 { V('/','L'), 1, "&#x0141;" },
496 { V('/','l'), 1, "&#x0142;" },
497 { V('/','O'), 1, "&Oslash;" },
498 { V('/','o'), 1, "&oslash;" },
499 { V('o','A'), 1, "&Aring;" },
500 { V('o','a'), 1, "&aring;" },
501 { V('a','"'), 1, "\"" },
502 { V('a','-'), 1, "&macr;" },
503 { V('a','.'), 1, "." },
504 { V('a','^'), 1, "&circ;" },
505 { V('a','a'), 1, "&acute;" },
506 { V('a','b'), 1, "`" },
507 { V('a','c'), 1, "&cedil;" },
508 { V('a','d'), 1, "&uml;" },
509 { V('a','h'), 1, "&#x02C2;" }, // caron
510 { V('a','o'), 1, "&#x02DA;" }, // ring
511 { V('a','~'), 1, "&tilde;" },
512 { V('h','o'), 1, "&#x02DB;" }, // ogonek
513 { V('.','i'), 1, "&#x0131;" }, // dot less i
514 { V('C','s'), 1, "&curren;" }, //krazy:exclude=spelling
515 { V('D','o'), 1, "$" },
516 { V('P','o'), 1, "&pound;" },
517 { V('Y','e'), 1, "&yen;" },
518 { V('F','n'), 1, "&fnof;" },
519 { V('F','o'), 1, "&laquo;" },
520 { V('F','c'), 1, "&raquo;" },
521 { V('f','o'), 1, "&#x2039;" }, // single left guillemet
522 { V('f','c'), 1, "&#x203A;" }, // single right guillemet
523 { V('r','!'), 1, "&iecl;" },
524 { V('r','?'), 1, "&iquest;" },
525 { V('O','f'), 1, "&ordf" },
526 { V('O','m'), 1, "&ordm;" },
527 { V('p','c'), 1, "&middot;" },
528 { V('S','1'), 1, "&sup1;" },
529 { V('S','2'), 1, "&sup2;" },
530 { V('S','3'), 1, "&sup3;" },
531 { V('<','-'), 1, "&larr;" },
532 { V('-','>'), 1, "&rarr;" },
533 { V('<','>'), 1, "&harr;" },
534 { V('d','a'), 1, "&darr;" },
535 { V('u','a'), 1, "&uarr;" },
536 { V('l','A'), 1, "&lArr;" },
537 { V('r','A'), 1, "&rArr;" },
538 { V('h','A'), 1, "&hArr;" },
539 { V('d','A'), 1, "&dArr;" },
540 { V('u','A'), 1, "&uArr;" },
541 { V('b','a'), 1, "|" },
542 { V('b','b'), 1, "&brvbar;" },
543 { V('t','m'), 1, "&trade;" },
544 { V('d','d'), 1, "&Dagger;" },
545 { V('p','s'), 1, "&para;" },
546 { V('%','0'), 1, "&permil;" },
547 { V('f','/'), 1, "&frasl;" }, // Fraction slash
548 { V('s','d'), 1, "&Prime;" },
549 { V('h','a'), 1, "^" },
550 { V('t','i'), 1, "&tidle;" },
551 { V('l','B'), 1, "[" },
552 { V('r','B'), 1, "]" },
553 { V('l','C'), 1, "{" },
554 { V('r','C'), 1, "}" },
555 { V('l','a'), 1, "&lt;" },
556 { V('r','a'), 1, "&gt;" },
557 { V('l','h'), 1, "&le;" },
558 { V('r','h'), 1, "&ge;" },
559 { V('B','q'), 1, "&bdquo;" },
560 { V('b','q'), 1, "&sbquo;" },
561 { V('l','q'), 1, "&ldquo;" },
562 { V('r','q'), 1, "&rdquo;" },
563 { V('o','q'), 1, "&lsquo;" },
564 { V('c','q'), 1, "&rsquo;" },
565 { V('a','q'), 1, "'" },
566 { V('d','q'), 1, "\"" },
567 { V('a','t'), 1, "@" },
568 { V('s','h'), 1, "#" },
569 { V('r','s'), 1, "\\" },
570 { V('t','f'), 1, "&there4;" },
571 { V('~','~'), 1, "&cong;" },
572 { V('~','='), 1, "&asymp;" },
573 { V('!','='), 1, "&ne;" },
574 { V('<','='), 1, "&le;" },
575 { V('=','='), 1, "&equiv;" },
576 { V('=','~'), 1, "&cong;" }, // ### TODO: verify
577 { V('>','='), 1, "&ge;" },
578 { V('A','N'), 1, "&and;" },
579 { V('O','R'), 1, "&or;" },
580 { V('t','e'), 1, "&exist;" },
581 { V('f','a'), 1, "&forall;" },
582 { V('A','h'), 1, "&alefsym;" },
583 { V('I','m'), 1, "&image;" },
584 { V('R','e'), 1, "&real;" },
585 { V('i','f'), 1, "&infin;" },
586 { V('m','d'), 1, "&sdot;" },
587 { V('m','o'), 1, "&#x2206;" }, // element ### TODO verify
588 { V('n','m'), 1, "&notin;" },
589 { V('p','t'), 1, "&prop;" },
590 { V('p','p'), 1, "&perp;" },
591 { V('s','b'), 1, "&sub;" },
592 { V('s','p'), 1, "&sup;" },
593 { V('i','b'), 1, "&sube;" },
594 { V('i','p'), 1, "&supe;" },
595 { V('i','s'), 1, "&int;" },
596 { V('s','r'), 1, "&radic;" },
597 { V('p','d'), 1, "&part;" },
598 { V('c','*'), 1, "&otimes;" },
599 { V('c','+'), 1, "&oplus;" },
600 { V('c','a'), 1, "&cap;" },
601 { V('c','u'), 1, "&cup;" },
602 { V('g','r'), 1, "V" }, // gradient ### TODO Where in Unicode?
603 { V('C','R'), 1, "&crarr;" },
604 { V('s','t'), 2, "-)" }, // "such that" ### TODO Where in Unicode?
605 { V('/','_'), 1, "&ang;" },
606 { V('w','p'), 1, "&weierp;" },
607 { V('l','z'), 1, "&loz;" },
608 { V('a','n'), 1, "-" }, // "horizontal arrow extension" ### TODO Where in Unicode?
611 /* default: print code */
614 /* static char eqndelimopen=0, eqndelimclose=0; */
615 static char escapesym='\\', nobreaksym='\'', controlsym='.', fieldsym=0, padsym=0;
617 static char *buffer=NULL;
618 static int buffpos=0, buffmax=0;
619 static bool scaninbuff=false;
620 static int itemdepth=0;
621 static int section=0;
622 static int dl_set[20]= { 0 };
623 static bool still_dd=0;
624 static int tabstops[20] = { 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96 };
625 static int maxtstop=12;
626 static int curpos=0;
628 static char *scan_troff(char *c, bool san, char **result);
629 static char *scan_troff_mandoc(char *c, bool san, char **result);
631 static QList<char*> s_argumentList;
633 static QByteArray htmlPath, cssPath;
635 static QByteArray s_dollarZero; // Value of $0
637 void setResourcePath(const QByteArray& _htmlPath, const QByteArray& _cssPath)
639 htmlPath=_htmlPath;
640 cssPath=_cssPath;
643 static void fill_old_character_definitions( void )
645 for (size_t i = 0; i < sizeof(standardchar)/sizeof(CSTRDEF); i++)
647 const int nr = standardchar[i].nr;
648 const char temp[3] = { nr / 256, nr % 256, 0 };
649 QByteArray name( temp );
650 s_characterDefinitionMap.insert( name, StringDefinition( standardchar[i].slen, standardchar[i].st ) );
654 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)];
655 static int no_newline_output=0;
656 static int newline_for_fun=0;
657 static bool output_possible=false;
659 static const char *includedirs[] = {
660 "/usr/include",
661 "/usr/include/sys",
662 "/usr/local/include",
663 "/opt/local/include",
664 "/usr/ccs",
665 "/usr/X11R6/include",
666 "/usr/openwin/include",
667 "/usr/include/g++",
671 static bool ignore_links=false;
673 static void add_links(char *c)
676 ** Add the links to the output.
677 ** At the moment the following are recognized:
679 ** name(*) -> ../man?/name.*
680 ** method://string -> method://string
681 ** www.host.name -> http://www.host.name
682 ** ftp.host.name -> ftp://ftp.host.name
683 ** name@host -> mailto:name@host
684 ** <name.h> -> file:/usr/include/name.h (guess)
686 ** Other possible links to add in the future:
688 ** /dir/dir/file -> file:/dir/dir/file
690 if (ignore_links)
692 output_real(c);
693 return;
696 int i,j,nr;
697 char *f, *g,*h;
698 const int numtests=6; // Nmber of tests
699 char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file
700 bool ok;
701 /* search for (section) */
702 nr=0;
703 idtest[0]=strstr(c+1,"://");
704 idtest[1]=strchr(c+1,'@');
705 idtest[2]=strstr(c,"www.");
706 idtest[3]=strstr(c,"ftp.");
707 idtest[4]=strchr(c+1,'(');
708 idtest[5]=strstr(c+1,".h&gt;");
709 for (i=0; i<numtests; ++i) nr += (idtest[i]!=NULL);
710 while (nr) {
711 j=-1;
712 for (i=0; i<numtests; i++)
713 if (idtest[i] && (j<0 || idtest[i]<idtest[j])) j=i;
714 switch (j) {
715 case 5: { /* <name.h> */
716 f=idtest[5];
717 h=f+2;
718 g=f;
719 while (g>c && g[-1]!=';') g--;
720 bool wrote_include = false;
722 if (g!=c) {
724 QByteArray dir;
725 QByteArray file(g, h - g + 1);
726 file = file.trimmed();
727 for (int index = 0; includedirs[index]; index++) {
728 QByteArray str( includedirs[index] );
729 str.append('/');
730 str.append(file);
731 if (!access(str.data(), R_OK)) {
732 dir = includedirs[index];
733 break;
736 if (!dir.isEmpty()) {
738 char t;
739 t=*g;
740 *g=0;
741 output_real(c);
742 *g=t;*h=0;
744 QByteArray str;
745 str.append( "<A HREF=\"file:" );
746 str.append( dir.data() );
747 str.append( "/" );
748 str.append( file.data() );
749 str.append( "\">" );
750 str.append( file.data() );
751 str.append( "</A>&gt;" );
753 output_real(str.data());
754 c=f+6;
755 wrote_include = true;
760 if (!wrote_include) {
761 f[5]=0;
762 output_real(c);
763 f[5]=';';
764 c=f+5;
767 break;
768 case 4: /* manpage */
769 f=idtest[j];
770 /* check section */
771 g=strchr(f,')');
772 // The character before f must alphanumeric, the end of a HTML tag or the end of a &nbsp;
773 if (g!=NULL && f>c && (g-f)<12 && (isalnum(f[-1]) || f[-1]=='>' || ( f[-1] == ';' ) ) &&
774 isdigit(f[1]) && f[1]!='0' && ((g-f)<=2 || isalpha(f[2])))
776 ok = true;
777 h = f+2;
778 while (h<g)
780 if (!isalnum(*h++))
782 ok = false;
783 break;
787 else
788 ok = false;
790 h = f - 1;
791 if ( ok )
793 // Skip &nbsp;
794 kDebug(7107) << "BEFORE SECTION:" << *h;
795 if ( ( h > c + 5 ) && ( ! memcmp( h-5, "&nbsp;", 6 ) ) )
797 h -= 6;
798 kDebug(7107) << "Skip &nbsp;";
800 else if ( *h == ';' )
802 // Not a non-breaking space, so probably not ok
803 ok = false;
807 if (ok)
809 /* this might be a link */
810 /* skip html makeup */
811 while (h>c && *h=='>') {
812 while (h!=c && *h!='<') h--;
813 if (h!=c) h--;
815 if (isalnum(*h)) {
816 char t,sec, *e;
817 QByteArray fstr(f);
818 e=h+1;
819 sec=f[1];
820 const int index = fstr.indexOf(')', 2);
821 QByteArray subsec;
822 if (index != -1)
823 subsec = fstr.mid(2, index - 2);
824 else // No closing ')' found, take first character as subsection.
825 subsec = fstr.mid(2, 1);
826 while (h>c && (isalnum(h[-1]) || h[-1]=='_'
827 || h[-1]==':' || h[-1]=='-' || h[-1]=='.'))
828 h--;
829 t=*h;
830 *h='\0';
831 output_real(c);
832 *h=t;
833 t=*e;
834 *e='\0';
835 QByteArray str("<a href=\"man:");
836 str += h;
837 str += '(';
838 str += char( sec );
839 if ( !subsec.isEmpty() )
840 str += subsec.toLower();
841 str += ")\">";
842 str += h;
843 str += "</a>";
844 output_real(str.data());
845 *e=t;
846 c=e;
849 *f='\0';
850 output_real(c);
851 *f='(';
852 idtest[4]=f-1;
853 c=f;
854 break; /* manpage */
855 case 3: /* ftp */
856 case 2: /* www */
857 g=f=idtest[j];
858 while (*g && (isalnum(*g) || *g=='_' || *g=='-' || *g=='+' ||
859 *g=='.' || *g=='/')) g++;
860 if (g[-1]=='.') g--;
861 if (g-f>4) {
862 char t;
863 t=*f; *f='\0';
864 output_real(c);
865 *f=t; t=*g;*g='\0';
866 QByteArray str;
867 str.append( "<A HREF=\"" );
868 str.append( j == 3 ? "ftp" : "http" );
869 str.append( "://" );
870 str.append( f );
871 str.append( "\">" );
872 str.append( f );
873 str.append( "</A>" );
874 output_real(str.data());
875 *g=t;
876 c=g;
877 } else {
878 f[3]='\0';
879 output_real(c);
880 c=f+3;
881 f[3]='.';
883 break;
884 case 1: /* mailto */
885 g=f=idtest[1];
886 while (g>c && (isalnum(g[-1]) || g[-1]=='_' || g[-1]=='-' ||
887 g[-1]=='+' || g[-1]=='.' || g[-1]=='%')) g--;
888 if (g-7>=c && g[-1]==':')
890 // We have perhaps an email address starting with mailto:
891 if (!qstrncmp("mailto:",g-7,7))
892 g-=7;
894 h=f+1;
895 while (*h && (isalnum(*h) || *h=='_' || *h=='-' || *h=='+' ||
896 *h=='.')) h++;
897 if (*h=='.') h--;
898 if (h-f>4 && f-g>1) {
899 char t;
900 t=*g;
901 *g='\0';
902 output_real(c);
903 *g=t;t=*h;*h='\0';
904 QByteArray str;
905 str.append( "<A HREF=\"mailto:" );
906 str.append( g );
907 str.append( "\">" );
908 str.append( g );
909 str.append( "</A>" );
910 output_real(str.data());
911 *h=t;
912 c=h;
913 } else {
914 *f='\0';
915 output_real(c);
916 *f='@';
917 idtest[1]=c;
918 c=f;
920 break;
921 case 0: /* url */
922 g=f=idtest[0];
923 while (g>c && isalpha(g[-1]) && islower(g[-1])) g--;
924 h=f+3;
925 while (*h && !isspace(*h) && *h!='<' && *h!='>' && *h!='"' &&
926 *h!='&') h++;
927 if (f-g>2 && f-g<7 && h-f>3) {
928 char t;
929 t=*g;
930 *g='\0';
931 output_real(c);
932 *g=t; t=*h; *h='\0';
933 QByteArray str;
934 str.append( "<A HREF=\"" );
935 str.append( g );
936 str.append( "\">" );
937 str.append( g );
938 str.append( "</A>" );
939 output_real(str.data());
940 *h=t;
941 c=h;
942 } else {
943 f[1]='\0';
944 output_real(c);
945 f[1]='/';
946 c=f+1;
948 break;
949 default:
950 break;
952 nr=0;
953 if (idtest[0] && idtest[0]<=c) idtest[0]=strstr(c+1,"://");
954 if (idtest[1] && idtest[1]<=c) idtest[1]=strchr(c+1,'@');
955 if (idtest[2] && idtest[2]<c) idtest[2]=strstr(c,"www.");
956 if (idtest[3] && idtest[3]<c) idtest[3]=strstr(c,"ftp.");
957 if (idtest[4] && idtest[4]<=c) idtest[4]=strchr(c+1,'(');
958 if (idtest[5] && idtest[5]<=c) idtest[5]=strstr(c+1,".h&gt;");
959 for (i=0; i<numtests; i++) nr += (idtest[i]!=NULL);
961 output_real(c);
964 static QByteArray current_font;
965 static int current_size=0;
966 static int fillout=1;
968 static void out_html(const char *c)
970 if (!c) return;
972 // Added, probably due to the const?
973 char *c2 = qstrdup(c);
974 char *c3 = c2;
976 static int obp=0;
978 if (no_newline_output) {
979 int i=0;
980 no_newline_output=1;
981 while (c2[i]) {
982 if (!no_newline_output) c2[i-1]=c2[i];
983 if (c2[i]=='\n') no_newline_output=0;
984 i++;
986 if (!no_newline_output) c2[i-1]=0;
988 if (scaninbuff) {
989 while (*c2) {
990 if (buffpos>=buffmax) {
991 char *h = new char[buffmax*2];
993 #ifdef SIMPLE_MAN2HTML
994 if (!h)
996 cerr << "Memory full, cannot output!" << endl;
997 exit(1);
999 #else
1000 // modern compiler do not return a NULL for a new
1001 #endif
1002 memcpy(h, buffer, buffmax);
1003 delete [] buffer;
1004 buffer=h;
1005 buffmax=buffmax*2;
1007 buffer[buffpos++]=*c2++;
1009 } else
1010 if (output_possible) {
1011 while (*c2) {
1012 outbuffer[obp++]=*c2;
1013 if (*c=='\n' || obp >= HUGE_STR_MAX) {
1014 outbuffer[obp]='\0';
1015 add_links(outbuffer);
1016 obp=0;
1018 c2++;
1021 delete [] c3;
1024 static QByteArray set_font( const QByteArray& name )
1026 // Every font but R (Regular) creates <span> elements
1027 QByteArray markup;
1028 if ( current_font != "R" && !current_font.isEmpty() )
1029 markup += "</span>";
1030 const uint len = name.length();
1031 bool fontok = true;
1032 if ( len == 1 )
1034 const char lead = name[0];
1035 switch (lead)
1037 case 'P': // ### TODO: this seems to mean "precedent font"
1038 case 'R': break; // regular, do nothing
1039 case 'I': markup += "<span style=\"font-style:italic\">"; break;
1040 case 'B': markup += "<span style=\"font-weight:bold\">"; break;
1041 case 'L': markup += "<span style=\"font-family:monospace\">"; break; // ### What's L?
1042 default: fontok = false;
1045 else if ( len == 2 )
1047 if ( name == "BI" )
1048 markup += "<span style=\"font-style:italic;font-weight:bold\">";
1049 // Courier
1050 else if ( name == "CR" )
1051 markup += "<span style=\"font-family:monospace\">";
1052 else if ( name == "CW" ) // CW is used by pod2man(1) (part of perldoc(1))
1053 markup += "<span style=\"font-family:monospace\">";
1054 else if ( name == "CI" )
1055 markup += "<span style=\"font-family:monospace;font-style:italic\">";
1056 else if ( name == "CB" )
1057 markup += "<span style=\"font-family:monospace;font-weight:bold\">";
1058 // Times
1059 else if ( name == "TR" )
1060 markup += "<span style=\"font-family:serif\">";
1061 else if ( name == "TI" )
1062 markup += "<span style=\"font-family:serif;font-style:italic\">";
1063 else if ( name == "TB" )
1064 markup += "<span style=\"font-family:serif;font-weight:bold\">";
1065 // Helvetica
1066 else if ( name == "HR" )
1067 markup += "<span style=\"font-family:sans-serif\">";
1068 else if ( name == "HI" )
1069 markup += "<span style=\"font-family:sans-serif;font-style:italic\">";
1070 else if ( name == "HB" )
1071 markup += "<span style=\"font-family:sans-serif;font-weight:bold\">";
1072 else
1073 fontok = false;
1075 else if ( len == 3 )
1077 if ( name == "CBI" )
1078 markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">";
1079 else if ( name == "TBI" )
1080 markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">";
1081 else if ( name == "HBI" )
1082 markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">";
1084 if (fontok)
1085 current_font = name;
1086 else
1087 current_font = "R"; // Still nothing, then it is 'R' (Regular)
1088 return markup;
1091 static QByteArray change_to_size(int nr)
1093 switch (nr)
1095 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1096 case '7': case '8': case '9': nr=nr-'0'; break;
1097 case '\0': break;
1098 default: nr=current_size+nr; if (nr>9) nr=9; if (nr< -9) nr=-9; break;
1100 if ( nr == current_size )
1101 return "";
1102 const QByteArray font ( current_font );
1103 QByteArray markup;
1104 markup = set_font("R");
1105 if (current_size)
1106 markup += "</FONT>";
1107 current_size=nr;
1108 if (nr)
1110 markup += "<FONT SIZE=\"";
1111 if (nr>0)
1112 markup += '+';
1113 else
1115 markup += '-';
1116 nr=-nr;
1118 markup += char( nr + '0' );
1119 markup += "\">";
1121 markup += set_font( font );
1122 return markup;
1125 /* static int asint=0; */
1126 static int intresult=0;
1128 #define SKIPEOL while (*c && *c++!='\n') {}
1130 static bool skip_escape=false;
1131 static bool single_escape=false;
1133 static char *scan_escape_direct( char *c, QByteArray& cstr );
1136 * scan a named character
1137 * param c position
1139 static QByteArray scan_named_character( char*& c )
1141 QByteArray name;
1142 if ( *c == '(' )
1144 // \*(ab Name of two characters
1145 if ( c[1] == escapesym )
1147 QByteArray cstr;
1148 c = scan_escape_direct( c+2, cstr );
1149 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1150 name = cstr;
1152 else
1154 name+=c[1];
1155 name+=c[2];
1156 c+=3;
1159 else if ( *c == '[' )
1161 // \*[long_name] Long name
1162 // Named character groff(7)
1163 // We must find the ] to get a name
1164 c++;
1165 while ( *c && *c != ']' && *c != '\n' )
1167 if ( *c == escapesym )
1169 QByteArray cstr;
1170 c = scan_escape_direct( c+1, cstr );
1171 const int result = cstr.indexOf(']');
1172 if ( result == -1 )
1173 name += cstr;
1174 else
1176 // Note: we drop the characters after the ]
1177 name += cstr.left( result );
1180 else
1182 name+=*c;
1183 c++;
1186 if ( !*c || *c == '\n' )
1188 kDebug(7107) << "Found linefeed! Could not parse character name: " << BYTEARRAY( name );
1189 return "";
1191 c++;
1193 else if ( *c =='C' || c[1]== '\'' )
1195 // \C'name'
1196 c+=2;
1197 while ( *c && *c != '\'' && *c != '\n' )
1199 if ( *c == escapesym )
1201 QByteArray cstr;
1202 c = scan_escape_direct( c+1, cstr );
1203 const int result = cstr.indexOf('\'');
1204 if ( result == -1 )
1205 name += cstr;
1206 else
1208 // Note: we drop the characters after the ]
1209 name += cstr.left( result );
1212 else
1214 name+=*c;
1215 c++;
1218 if ( !*c || *c == '\n' )
1220 kDebug(7107) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY( name );
1221 return "";
1223 c++;
1225 // Note: characters with a one character length name doe not exist, as they would collide with other escapes
1227 // Now we have the name, let us find it between the string names
1228 QMap<QByteArray,StringDefinition>::const_iterator it=s_characterDefinitionMap.find(name);
1229 if (it==s_characterDefinitionMap.end())
1231 kDebug(7107) << "EXCEPTION: cannot find character with name: " << BYTEARRAY( name );
1232 // No output, as an undefined string is empty by default
1233 return "";
1235 else
1237 kDebug(7107) << "Character with name: \"" << BYTEARRAY( name ) << "\" => " << BYTEARRAY( (*it).m_output );
1238 return (*it).m_output;
1242 static QByteArray scan_named_string(char*& c)
1244 QByteArray name;
1245 if ( *c == '(' )
1247 // \*(ab Name of two characters
1248 if ( c[1] == escapesym )
1250 QByteArray cstr;
1251 c = scan_escape_direct( c+2, cstr );
1252 kDebug(7107) << "\\(" << BYTEARRAY( cstr );
1253 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1254 name = cstr;
1256 else
1258 name+=c[1];
1259 name+=c[2];
1260 c+=3;
1263 else if ( *c == '[' )
1265 // \*[long_name] Long name
1266 // Named character groff(7)
1267 // We must find the ] to get a name
1268 c++;
1269 while ( *c && *c != ']' && *c != '\n' )
1271 if ( *c == escapesym )
1273 QByteArray cstr;
1274 c = scan_escape_direct( c+1, cstr );
1275 const int result = cstr.indexOf(']');
1276 if ( result == -1 )
1277 name += cstr;
1278 else
1280 // Note: we drop the characters after the ]
1281 name += cstr.left( result );
1284 else
1286 name+=*c;
1287 c++;
1290 if ( !*c || *c == '\n' )
1292 kDebug(7107) << "Found linefeed! Could not parse string name: " << BYTEARRAY( name );
1293 return "";
1295 c++;
1297 else
1299 // \*a Name of one character
1300 name+=*c;
1301 c++;
1303 // Now we have the name, let us find it between the string names
1304 QMap<QByteArray,StringDefinition>::const_iterator it=s_stringDefinitionMap.find(name);
1305 if (it==s_stringDefinitionMap.end())
1307 kDebug(7107) << "EXCEPTION: cannot find string with name: " << BYTEARRAY( name );
1308 // No output, as an undefined string is empty by default
1309 return "";
1311 else
1313 kDebug(7107) << "String with name: \"" << BYTEARRAY( name ) << "\" => " << BYTEARRAY( (*it).m_output );
1314 return (*it).m_output;
1318 static QByteArray scan_dollar_parameter(char*& c)
1320 int argno = 0; // No dollar argument number yet!
1321 if ( *c == '0' )
1323 //kDebug(7107) << "$0";
1324 c++;
1325 return s_dollarZero;
1327 else if ( *c >= '1' && *c <= '9' )
1329 //kDebug(7107) << "$ direct";
1330 argno = ( *c - '0' );
1331 c++;
1333 else if ( *c == '(' )
1335 //kDebug(7107) << "$(";
1336 if ( c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9' )
1338 argno = ( c[1] - '0' ) * 10 + ( c[2] - '0' );
1339 c += 3;
1341 else
1343 if ( !c[1] )
1344 c++;
1345 else if ( !c[2] )
1346 c+=2;
1347 else
1348 c += 3;
1349 return "";
1352 else if ( *c == '[' )
1354 //kDebug(7107) << "$[";
1355 argno = 0;
1356 c++;
1357 while ( *c && *c>='0' && *c<='9' && *c!=']' )
1359 argno *= 10;
1360 argno += ( *c - '0' );
1361 c++;
1363 if ( *c != ']' )
1365 return "";
1367 c++;
1369 else if ( ( *c == '*' ) || ( *c == '@' ) )
1371 const bool quote = ( *c == '@' );
1372 QList<char*>::const_iterator it = s_argumentList.begin();
1373 QByteArray param;
1374 bool space = false;
1375 for ( ; it != s_argumentList.end(); ++it )
1377 if (space)
1378 param += ' ';
1379 if (quote)
1380 param += '\"'; // Not as HTML, as it could be used by macros !
1381 param += (*it);
1382 if (quote)
1383 param += '\"'; // Not as HTML, as it could be used by macros!
1384 space = true;
1386 c++;
1387 return param;
1389 else
1391 kDebug(7107) << "EXCEPTION: unknown parameter $" << *c;
1392 return "";
1394 //kDebug(7107) << "ARG $" << argno;
1395 if ( !s_argumentList.isEmpty() && argno > 0 )
1397 //kDebug(7107) << "ARG $" << argno << " OK!";
1398 argno--;
1399 if ( argno >= s_argumentList.size() )
1401 kDebug(7107) << "EXCEPTION: cannot find parameter $" << (argno+1);
1402 return "";
1405 return s_argumentList[argno];
1407 return "";
1410 /// return the value of read-only number registers
1411 static int read_only_number_register( const QByteArray& name )
1413 // Internal read-only variables
1414 if ( name == ".$" )
1416 kDebug(7107) << "\\n[.$] == " << s_argumentList.size();
1417 return s_argumentList.size();
1419 else if ( name == ".g" )
1420 return 0; // We are not groff(1)
1421 else if ( name == ".s" )
1422 return current_size;
1423 #if 0
1424 // ### TODO: map the fonts to a number
1425 else if ( name == ".f" )
1426 return current_font;
1427 #endif
1428 else if ( name == ".P" )
1429 return 0; // We are not printing
1430 else if ( name == ".A" )
1431 return s_nroff;
1432 #ifndef SIMPLE_MAN2HTML
1433 // Special KDE KIO man:
1434 else if ( name == ".KDE_VERSION_MAJOR" )
1435 return KDE_VERSION_MAJOR;
1436 else if ( name == ".KDE_VERSION_MINOR" )
1437 return KDE_VERSION_MINOR;
1438 else if ( name == ".KDE_VERSION_RELEASE" )
1439 return KDE_VERSION_RELEASE;
1440 else if ( name == ".KDE_VERSION" )
1441 return KDE_VERSION;
1442 #endif
1443 // ### TODO: should .T be set to "html"? But we are not the HTML post-processor. :-(
1445 // ### TODO: groff defines many more read-only number registers
1446 kDebug(7107) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY( name );
1448 return 0; // Undefined variable
1452 /// get the value of a number register and auto-increment if asked
1453 static int scan_number_register( char*& c)
1455 int sign = 0; // Sign for auto-increment (if any)
1456 switch (*c)
1458 case '+': sign = 1; c++; break;
1459 case '-': sign = -1; c++; break;
1460 default: break;
1462 QByteArray name;
1463 if ( *c == '[' )
1465 c++;
1466 if ( *c == '+' )
1468 sign = 1;
1469 c++;
1471 else if ( *c == '-' )
1473 sign = -1;
1474 c++;
1476 while ( *c && *c != ']' && *c != '\n' )
1478 // ### TODO: a \*[string] could be inside and should be processed
1479 name+=*c;
1480 c++;
1482 if ( !*c || *c == '\n' )
1484 kDebug(7107) << "Found linefeed! Could not parse number register name: " << BYTEARRAY( name );
1485 return 0;
1487 c++;
1489 else if ( *c == '(' )
1491 c++;
1492 if ( *c == '+' )
1494 sign = 1;
1495 c++;
1497 else if ( *c == '-' )
1499 sign = -1;
1500 c++;
1502 name+=c[0];
1503 name+=c[1];
1504 c+=2;
1506 else
1508 name += *c;
1509 c++;
1511 if ( name[0] == '.' )
1513 return read_only_number_register( name );
1515 else
1517 QMap< QByteArray, NumberDefinition >::iterator it = s_numberDefinitionMap.find( name );
1518 if ( it == s_numberDefinitionMap.end() )
1520 return 0; // Undefined variable
1522 else
1524 (*it).m_value += sign * (*it).m_increment;
1525 return (*it).m_value;
1530 /// get and set font
1531 static QByteArray scan_named_font( char*& c )
1533 QByteArray name;
1534 if ( *c == '(' )
1536 // \f(ab Name of two characters
1537 if ( c[1] == escapesym )
1539 QByteArray cstr;
1540 c = scan_escape_direct( c+2, cstr );
1541 kDebug(7107) << "\\(" << BYTEARRAY( cstr );
1542 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1543 name = cstr;
1545 else
1547 name+=c[1];
1548 name+=c[2];
1549 c+=3;
1552 else if ( *c == '[' )
1554 // \f[long_name] Long name
1555 // We must find the ] to get a name
1556 c++;
1557 while ( *c && *c != ']' && *c != '\n' )
1559 if ( *c == escapesym )
1561 QByteArray cstr;
1562 c = scan_escape_direct( c+1, cstr );
1563 const int result = cstr.indexOf(']');
1564 if ( result == -1 )
1565 name += cstr;
1566 else
1568 // Note: we drop the characters after the ]
1569 name += cstr.left( result );
1572 else
1574 name+=*c;
1575 c++;
1578 if ( !*c || *c == '\n' )
1580 kDebug(7107) << "Found linefeed! Could not parse font name: " << BYTEARRAY( name );
1581 return "";
1583 c++;
1585 else
1587 // \fa Font name with one character or one digit
1588 // ### HACK do *not* use: name = *c; or name would be empty
1589 name += *c;
1590 c++;
1592 //kDebug(7107) << "FONT NAME: " << BYTEARRAY( name );
1593 // Now we have the name, let us find the font
1594 bool ok = false;
1595 const unsigned int number = name.toUInt( &ok );
1596 if ( ok )
1598 if ( number < 5 )
1600 const char* fonts[] = { "R", "I", "B", "BI", "CR" }; // Regular, Italic, Bold, Bold Italic, Courier regular
1601 name = fonts[ number ];
1603 else
1605 kDebug(7107) << "EXCEPTION: font has too big number: " << BYTEARRAY( name ) << " => " << number;
1606 name = "R"; // Let assume Regular
1609 else if ( name.isEmpty() )
1611 kDebug(7107) << "EXCEPTION: font has no name: " << BYTEARRAY( name );
1612 name = "R"; // Let assume Regular
1614 if ( !skip_escape )
1615 return set_font( name );
1616 else
1617 return "";
1620 static QByteArray scan_number_code( char*& c )
1622 QByteArray number;
1623 if ( *c != '\'' )
1624 return "";
1625 while ( *c && ( *c != '\n' ) && ( *c != '\'' ) )
1627 number += *c;
1628 c++;
1630 bool ok = false;
1631 unsigned int result = number.toUInt( &ok );
1632 if ( ( result < ' ' ) || ( result > 65535 ) )
1633 return "";
1634 else if ( result == '\t' )
1636 curpos += 8;
1637 curpos &= 0xfff8;
1638 return "\t";
1640 number.setNum( result );
1641 number.prepend( "&#" );
1642 number.append( ";" );
1643 curpos ++;
1644 return number;
1647 // ### TODO known missing escapes from groff(7):
1648 // ### TODO \& \! \) \: \R
1650 static char *scan_escape_direct( char *c, QByteArray& cstr )
1652 bool exoutputp;
1653 bool exskipescape;
1654 int i,j;
1655 bool cplusplus = true; // Should the c++ call be executed at the end of the function
1657 cstr = "";
1658 intresult=0;
1659 switch (*c) {
1660 case 'e': cstr = "\\"; curpos++;break; // ### FIXME: it should be the current escape symbol
1661 case '0': // ### TODO Where in Unicode? (space of digit width)
1662 case '~': // non-breakable-space (resizeable!)
1663 case ' ':
1664 case '|': // half-non-breakable-space
1665 case '^': // quarter-non-breakable-space
1666 cstr = "&nbsp;"; curpos++; break;
1667 case '"': SKIPEOL; c--; break;
1668 // ### TODO \# like \" but does not ignore the end of line (groff(7))
1669 case '$':
1671 c++;
1672 cstr = scan_dollar_parameter( c );
1673 cplusplus = false;
1674 break;
1676 case 'z':
1678 c++;
1679 if (*c=='\\')
1681 c=scan_escape_direct( c+1, cstr );
1682 c--;
1684 else
1685 cstr = QByteArray( c, 1 );
1686 break;
1688 case 'k': c++; if (*c=='(') c+=2; // ### FIXME \k[REG] exists too
1689 case '!':
1690 case '%':
1691 case 'a':
1692 case 'd':
1693 case 'r':
1694 case 'u':
1695 case '\n':
1696 case '&':
1697 cstr = ""; break;
1698 case '(':
1699 case '[':
1700 case 'C':
1702 // Do not go forward as scan_named_character needs the leading symbol
1703 cstr = scan_named_character( c );
1704 cplusplus = false;
1705 break;
1707 case '*':
1709 c++;
1710 cstr = scan_named_string( c );
1711 cplusplus = false;
1712 break;
1714 case 'f':
1716 c++;
1717 cstr = scan_named_font( c );
1718 cplusplus = false;
1719 break;
1721 case 's': // ### FIXME: many forms are missing
1722 c++;
1723 j=0;i=0;
1724 if (*c=='-') {j= -1; c++;} else if (*c=='+') {j=1; c++;}
1725 if (*c=='0') c++; else if (*c=='\\') {
1726 c++;
1727 c=scan_escape_direct( c, cstr );
1728 i=intresult; if (!j) j=1;
1729 } else
1730 while (isdigit(*c) && (!i || (!j && i<4))) i=i*10+(*c++)-'0';
1731 if (!j) { j=1; if (i) i=i-10; }
1732 if (!skip_escape) cstr=change_to_size(i*j);
1733 c--;
1734 break;
1735 case 'n':
1737 c++;
1738 intresult = scan_number_register( c );
1739 cplusplus = false;
1740 break;
1742 case 'w':
1743 c++;
1744 i=*c;
1745 c++;
1746 exoutputp=output_possible;
1747 exskipescape=skip_escape;
1748 output_possible=false;
1749 skip_escape=true;
1750 j=0;
1751 while (*c!=i)
1753 j++;
1754 if ( *c == escapesym )
1755 c = scan_escape_direct( c+1, cstr);
1756 else
1757 c++;
1759 output_possible=exoutputp;
1760 skip_escape=exskipescape;
1761 intresult=j;
1762 break;
1763 case 'l': cstr = "<HR>"; curpos=0;
1764 case 'b':
1765 case 'v':
1766 case 'x':
1767 case 'o':
1768 case 'L':
1769 case 'h':
1770 c++;
1771 i=*c;
1772 c++;
1773 exoutputp=output_possible;
1774 exskipescape=skip_escape;
1775 output_possible=0;
1776 skip_escape=true;
1777 while (*c != i)
1778 if (*c==escapesym) c=scan_escape_direct( c+1, cstr );
1779 else c++;
1780 output_possible=exoutputp;
1781 skip_escape=exskipescape;
1782 break;
1783 case 'c': no_newline_output=1; break;
1784 case '{': newline_for_fun++; break; // Start conditional block
1785 case '}': if (newline_for_fun) newline_for_fun--; break; // End conditional block
1786 case 'p': cstr = "<BR>\n";curpos=0; break;
1787 case 't': cstr = "\t";curpos=(curpos+8)&0xfff8; break;
1788 case '<': cstr = "&lt;";curpos++; break;
1789 case '>': cstr = "&gt;";curpos++; break;
1790 case '\\':
1792 if (single_escape)
1793 c--;
1794 else
1795 cstr="\\";
1796 break;
1798 case 'N':
1800 c++;
1801 cstr = scan_number_code( c );
1802 cplusplus = false;
1803 break;
1805 case '\'': cstr = "&acute;";curpos++; break; // groff(7) ### TODO verify
1806 case '`': cstr = "`";curpos++; break; // groff(7)
1807 case '-': cstr = "-";curpos++; break; // groff(7)
1808 case '.': cstr = ".";curpos++; break; // groff(7)
1809 default: cstr = QByteArray( c, 1 ); curpos++; break;
1811 if (cplusplus)
1812 c++;
1813 return c;
1816 static char *scan_escape(char *c)
1818 QByteArray cstr;
1819 char* result = scan_escape_direct( c, cstr );
1820 if ( !skip_escape )
1821 out_html(cstr);
1822 return result;
1825 class TABLEROW;
1827 class TABLEITEM {
1828 public:
1829 TABLEITEM(TABLEROW *row);
1830 ~TABLEITEM() {
1831 delete [] contents;
1833 void setContents(const char *_contents) {
1834 delete [] contents;
1835 contents = qstrdup(_contents);
1837 const char *getContents() const { return contents; }
1839 void init() {
1840 delete [] contents;
1841 contents = 0;
1842 size = 0;
1843 align = 0;
1844 valign = 0;
1845 colspan = 1;
1846 rowspan = 1;
1847 font = 0;
1848 vleft = 0;
1849 vright = 0;
1850 space = 0;
1851 width = 0;
1854 void copyLayout(const TABLEITEM *orig) {
1855 size = orig->size;
1856 align = orig->align;
1857 valign = orig->valign;
1858 colspan = orig->colspan;
1859 rowspan = orig->rowspan;
1860 font = orig->font;
1861 vleft = orig->vleft;
1862 vright = orig->vright;
1863 space = orig->space;
1864 width = orig->width;
1867 public:
1868 int size,align,valign,colspan,rowspan,font,vleft,vright,space,width;
1870 private:
1871 char *contents;
1872 TABLEROW *_parent;
1875 class TABLEROW {
1876 char *test;
1877 public:
1878 TABLEROW() {
1879 test = new char;
1880 prev = 0; next = 0;
1882 ~TABLEROW() {
1883 qDeleteAll(items);
1884 items.clear();
1885 delete test;
1888 int length() const { return items.count(); }
1889 bool has(int index) {
1890 return (index >= 0) && (index < (int)items.count());
1892 TABLEITEM &at(int index) {
1893 return *items.at(index);
1896 TABLEROW *copyLayout() const;
1898 void addItem(TABLEITEM *item) {
1899 items.append(item);
1901 TABLEROW *prev, *next;
1903 private:
1904 QList<TABLEITEM*> items;
1907 TABLEITEM::TABLEITEM(TABLEROW *row) : contents(0), _parent(row) {
1908 init();
1909 _parent->addItem(this);
1912 TABLEROW *TABLEROW::copyLayout() const {
1913 TABLEROW *newrow = new TABLEROW();
1915 QListIterator<TABLEITEM *> it(items);
1916 while (it.hasNext()){
1917 TABLEITEM *newitem = new TABLEITEM(newrow);
1918 newitem->copyLayout(it.next());
1920 return newrow;
1923 static const char *tableopt[]= { "center", "expand", "box", "allbox",
1924 "doublebox", "tab", "linesize",
1925 "delim", NULL };
1926 static int tableoptl[] = { 6,6,3,6,9,3,8,5,0};
1929 static void clear_table(TABLEROW *table)
1931 TABLEROW *tr1,*tr2;
1933 tr1=table;
1934 while (tr1->prev) tr1=tr1->prev;
1935 while (tr1) {
1936 tr2=tr1;
1937 tr1=tr1->next;
1938 delete tr2;
1942 static char *scan_expression(char *c, int *result);
1944 static char *scan_format(char *c, TABLEROW **result, int *maxcol)
1946 TABLEROW *layout, *currow;
1947 TABLEITEM *curfield;
1948 int i,j;
1949 if (*result) {
1950 clear_table(*result);
1952 layout= currow=new TABLEROW();
1953 curfield=new TABLEITEM(currow);
1954 while (*c && *c!='.') {
1955 switch (*c) {
1956 case 'C': case 'c': case 'N': case 'n':
1957 case 'R': case 'r': case 'A': case 'a':
1958 case 'L': case 'l': case 'S': case 's':
1959 case '^': case '_':
1960 if (curfield->align)
1961 curfield=new TABLEITEM(currow);
1962 curfield->align=toupper(*c);
1963 c++;
1964 break;
1965 case 'i': case 'I': case 'B': case 'b':
1966 curfield->font = toupper(*c);
1967 c++;
1968 break;
1969 case 'f': case 'F':
1970 c++;
1971 curfield->font = toupper(*c);
1972 c++;
1973 if (!isspace(*c) && *c!='.') c++;
1974 break;
1975 case 't': case 'T': curfield->valign='t'; c++; break;
1976 case 'p': case 'P':
1977 c++;
1978 i=j=0;
1979 if (*c=='+') { j=1; c++; }
1980 if (*c=='-') { j=-1; c++; }
1981 while (isdigit(*c)) i=i*10+(*c++)-'0';
1982 if (j) curfield->size= i*j; else curfield->size=j-10;
1983 break;
1984 case 'v': case 'V':
1985 case 'w': case 'W':
1986 c=scan_expression(c+2,&curfield->width);
1987 break;
1988 case '|':
1989 if (curfield->align) curfield->vleft++;
1990 else curfield->vright++;
1991 c++;
1992 break;
1993 case 'e': case 'E':
1994 c++;
1995 break;
1996 case '0': case '1': case '2': case '3': case '4':
1997 case '5': case '6': case '7': case '8': case '9':
1998 i=0;
1999 while (isdigit(*c)) i=i*10+(*c++)-'0';
2000 curfield->space=i;
2001 break;
2002 case ',': case '\n':
2003 currow->next=new TABLEROW();
2004 currow->next->prev=currow;
2005 currow=currow->next;
2006 currow->next=NULL;
2007 curfield=new TABLEITEM(currow);
2008 c++;
2009 break;
2010 default:
2011 c++;
2012 break;
2015 if (*c=='.') while (*c++!='\n');
2016 *maxcol=0;
2017 currow=layout;
2018 while (currow) {
2019 i=currow->length();
2020 if (i>*maxcol) *maxcol=i;
2021 currow=currow->next;
2023 *result=layout;
2024 return c;
2027 static TABLEROW *next_row(TABLEROW *tr)
2029 if (tr->next) {
2030 tr=tr->next;
2031 if (!tr->next)
2032 return next_row(tr);
2033 return tr;
2034 } else {
2035 tr->next = tr->copyLayout();
2036 tr->next->prev = tr;
2037 return tr->next;
2041 static char itemreset[20]="\\fR\\s0";
2043 #define FORWARDCUR do { curfield++; } while (currow->has(curfield) && currow->at(curfield).align=='S');
2045 static char *scan_table(char *c)
2047 char *h;
2048 char *g;
2049 int center=0, expand=0, box=0, border=0, linesize=1;
2050 int i,j,maxcol=0, finished=0;
2051 QByteArray oldfont;
2052 int oldsize,oldfillout;
2053 char itemsep='\t';
2054 TABLEROW *layout=NULL, *currow;
2055 int curfield = -1;
2056 while (*c++!='\n');
2057 h=c;
2058 if (*h=='.') return c-1;
2059 oldfont=current_font;
2060 oldsize=current_size;
2061 oldfillout=fillout;
2062 out_html(set_font("R"));
2063 out_html(change_to_size(0));
2064 if (!fillout) {
2065 fillout=1;
2066 out_html("</PRE>");
2068 while (*h && *h!='\n') h++;
2069 if (h[-1]==';') {
2070 /* scan table options */
2071 while (c<h) {
2072 while (isspace(*c)) c++;
2073 for (i=0; tableopt[i] && qstrncmp(tableopt[i],c,tableoptl[i]);i++);
2074 c=c+tableoptl[i];
2075 switch (i) {
2076 case 0: center=1; break;
2077 case 1: expand=1; break;
2078 case 2: box=1; break;
2079 case 3: border=1; break;
2080 case 4: box=2; break;
2081 case 5: while (*c++!='('); itemsep=*c++; break;
2082 case 6: while (*c++!='('); linesize=0;
2083 while (isdigit(*c)) linesize=linesize*10+(*c++)-'0';
2084 break;
2085 case 7: while (*c!=')') c++;
2086 default: break;
2088 c++;
2090 c=h+1;
2092 /* scan layout */
2093 c=scan_format(c,&layout, &maxcol);
2094 // currow=layout;
2095 currow=next_row(layout);
2096 curfield=0;
2097 i=0;
2098 while (!finished && *c) {
2099 /* search item */
2100 h=c;
2101 if ((*c=='_' || *c=='=') && (c[1]==itemsep || c[1]=='\n')) {
2102 if (c[-1]=='\n' && c[1]=='\n') {
2103 if (currow->prev) {
2104 currow->prev->next=new TABLEROW();
2105 currow->prev->next->next=currow;
2106 currow->prev->next->prev=currow->prev;
2107 currow->prev=currow->prev->next;
2108 } else {
2109 currow->prev=layout=new TABLEROW();
2110 currow->prev->prev=NULL;
2111 currow->prev->next=currow;
2113 TABLEITEM *newitem = new TABLEITEM(currow->prev);
2114 newitem->align=*c;
2115 newitem->colspan=maxcol;
2116 curfield=0;
2117 c=c+2;
2118 } else {
2119 if (currow->has(curfield)) {
2120 currow->at(curfield).align=*c;
2121 FORWARDCUR;
2123 if (c[1]=='\n') {
2124 currow=next_row(currow);
2125 curfield=0;
2127 c=c+2;
2129 } else if (*c=='T' && c[1]=='{') {
2130 h=c+2;
2131 c=strstr(h,"\nT}");
2132 c++;
2133 *c='\0';
2134 g=NULL;
2135 scan_troff(h,0,&g);
2136 scan_troff(itemreset, 0, &g);
2137 *c='T';
2138 c+=3;
2139 if (currow->has(curfield)) {
2140 currow->at(curfield).setContents(g);
2141 FORWARDCUR;
2143 delete [] g;
2145 if (c[-1]=='\n') {
2146 currow=next_row(currow);
2147 curfield=0;
2149 } else if (*c=='.' && c[1]=='T' && c[2]=='&' && c[-1]=='\n') {
2150 TABLEROW *hr;
2151 while (*c++!='\n');
2152 hr=currow;
2153 currow=currow->prev;
2154 hr->prev=NULL;
2155 c=scan_format(c,&hr, &i);
2156 hr->prev=currow;
2157 currow->next=hr;
2158 currow=hr;
2159 next_row(currow);
2160 curfield=0;
2161 } else if (*c=='.' && c[1]=='T' && c[2]=='E' && c[-1]=='\n') {
2162 finished=1;
2163 while (*c++!='\n');
2164 if (currow->prev)
2165 currow->prev->next=NULL;
2166 currow->prev=NULL;
2167 clear_table(currow);
2168 currow = 0;
2169 } else if (*c=='.' && c[-1]=='\n' && !isdigit(c[1])) {
2170 /* skip troff request inside table (usually only .sp ) */
2171 while (*c++!='\n');
2172 } else {
2173 h=c;
2174 while (*c && (*c!=itemsep || c[-1]=='\\') &&
2175 (*c!='\n' || c[-1]=='\\')) c++;
2176 i=0;
2177 if (*c==itemsep) {i=1; *c='\n'; }
2178 if (h[0]=='\\' && h[2]=='\n' &&
2179 (h[1]=='_' || h[1]=='^')) {
2180 if (currow->has(curfield)) {
2181 currow->at(curfield).align=h[1];
2182 FORWARDCUR;
2184 h=h+3;
2185 } else {
2186 g=NULL;
2187 h=scan_troff(h,1,&g);
2188 scan_troff(itemreset,0, &g);
2189 if (currow->has(curfield)) {
2190 currow->at(curfield).setContents(g);
2191 FORWARDCUR;
2193 delete [] g;
2195 if (i) *c=itemsep;
2196 c=h;
2197 if (c[-1]=='\n') {
2198 currow=next_row(currow);
2199 curfield=0;
2203 /* calculate colspan and rowspan */
2204 currow=layout;
2205 while (currow->next) currow=currow->next;
2206 while (currow) {
2207 int ti = 0, ti1 = 0, ti2 = -1;
2208 TABLEROW *prev = currow->prev;
2209 if (!prev)
2210 break;
2212 while (prev->has(ti1)) {
2213 if (currow->has(ti))
2214 switch (currow->at(ti).align) {
2215 case 'S':
2216 if (currow->has(ti2)) {
2217 currow->at(ti2).colspan++;
2218 if (currow->at(ti2).rowspan<prev->at(ti1).rowspan)
2219 currow->at(ti2).rowspan=prev->at(ti1).rowspan;
2221 break;
2222 case '^':
2223 if (prev->has(ti1)) prev->at(ti1).rowspan++;
2224 default:
2225 if (ti2 < 0) ti2=ti;
2226 else {
2227 do {
2228 ti2++;
2229 } while (currow->has(ti2) && currow->at(ti2).align=='S');
2231 break;
2233 ti++;
2234 if (ti1 >= 0) ti1++;
2236 currow=currow->prev;
2238 /* produce html output */
2239 if (center) out_html("<CENTER>");
2240 if (box==2) out_html("<TABLE BORDER><TR><TD>");
2241 out_html("<TABLE");
2242 if (box || border) {
2243 out_html(" BORDER");
2244 if (!border) out_html("><TR><TD><TABLE");
2245 if (expand) out_html(" WIDTH=\"100%\"");
2247 out_html(">\n");
2248 currow=layout;
2249 while (currow) {
2250 j=0;
2251 out_html("<TR VALIGN=top>");
2252 curfield=0;
2253 while (currow->has(curfield)) {
2254 if (currow->at(curfield).align!='S' && currow->at(curfield).align!='^') {
2255 out_html("<TD");
2256 switch (currow->at(curfield).align) {
2257 case 'N':
2258 currow->at(curfield).space+=4;
2259 case 'R':
2260 out_html(" ALIGN=right");
2261 break;
2262 case 'C':
2263 out_html(" ALIGN=center");
2264 default:
2265 break;
2267 if (!currow->at(curfield).valign && currow->at(curfield).rowspan>1)
2268 out_html(" VALIGN=center");
2269 if (currow->at(curfield).colspan>1) {
2270 char buf[5];
2271 out_html(" COLSPAN=");
2272 sprintf(buf, "%i", currow->at(curfield).colspan);
2273 out_html(buf);
2275 if (currow->at(curfield).rowspan>1) {
2276 char buf[5];
2277 out_html(" ROWSPAN=");
2278 sprintf(buf, "%i", currow->at(curfield).rowspan);
2279 out_html(buf);
2281 j=j+currow->at(curfield).colspan;
2282 out_html(">");
2283 if (currow->at(curfield).size) out_html(change_to_size(currow->at(curfield).size));
2284 if (currow->at(curfield).font)
2285 out_html(set_font(QByteArray::number(currow->at(curfield).font) ));
2286 switch (currow->at(curfield).align) {
2287 case '=': out_html("<HR><HR>"); break;
2288 case '_': out_html("<HR>"); break;
2289 default:
2290 out_html(currow->at(curfield).getContents());
2291 break;
2293 if (currow->at(curfield).space)
2294 for (i=0; i<currow->at(curfield).space;i++) out_html("&nbsp;");
2295 if (currow->at(curfield).font) out_html(set_font("R"));
2296 if (currow->at(curfield).size) out_html(change_to_size(0));
2297 if (j>=maxcol && currow->at(curfield).align>'@' && currow->at(curfield).align!='_')
2298 out_html("<BR>");
2299 out_html("</TD>");
2301 curfield++;
2303 out_html("</TR>\n");
2304 currow=currow->next;
2307 clear_table(layout);
2309 if (box && !border) out_html("</TABLE>");
2310 out_html("</TABLE>");
2311 if (box==2) out_html("</TABLE>");
2312 if (center) out_html("</CENTER>\n");
2313 else out_html("\n");
2314 if (!oldfillout) out_html("<PRE>");
2315 fillout=oldfillout;
2316 out_html(change_to_size(oldsize));
2317 out_html(set_font(oldfont));
2318 return c;
2321 static char *scan_expression( char *c, int *result, const unsigned int numLoop )
2323 int value=0,value2,sign=1,opex=0;
2324 char oper='c';
2326 if (*c=='!') {
2327 c=scan_expression(c+1, &value);
2328 value= (!value);
2329 } else if (*c=='n') {
2330 c++;
2331 value=s_nroff;
2332 } else if (*c=='t') {
2333 c++;
2334 value=1-s_nroff;
2335 } else if (*c=='\'' || *c=='"' || *c<' ' || (*c=='\\' && c[1]=='(')) {
2336 /* ?string1?string2?
2337 ** test if string1 equals string2.
2339 char *st1=NULL, *st2=NULL, *h;
2340 char *tcmp=NULL;
2341 char sep;
2342 sep=*c;
2343 if (sep=='\\') {
2344 tcmp=c;
2345 c=c+3;
2347 c++;
2348 h=c;
2349 while (*c!= sep && (!tcmp || qstrncmp(c,tcmp,4))) c++;
2350 *c='\n';
2351 scan_troff(h, 1, &st1);
2352 *c=sep;
2353 if (tcmp) c=c+3;
2354 c++;
2355 h=c;
2356 while (*c!=sep && (!tcmp || qstrncmp(c,tcmp,4))) c++;
2357 *c='\n';
2358 scan_troff(h,1,&st2);
2359 *c=sep;
2360 if (!st1 && !st2) value=1;
2361 else if (!st1 || !st2) value=0;
2362 else value=(!qstrcmp(st1, st2));
2363 delete [] st1;
2364 delete [] st2;
2365 if (tcmp) c=c+3;
2366 c++;
2367 } else {
2368 while (*c && ( !isspace(*c) || ( numLoop > 0 ) ) && *c!=')' && opex >= 0) {
2369 opex=0;
2370 switch (*c) {
2371 case '(':
2372 c = scan_expression( c + 1, &value2, numLoop + 1 );
2373 value2=sign*value2;
2374 opex=1;
2375 break;
2376 case '.':
2377 case '0': case '1':
2378 case '2': case '3':
2379 case '4': case '5':
2380 case '6': case '7':
2381 case '8': case '9': {
2382 int num=0,denum=1;
2383 value2=0;
2384 while (isdigit(*c)) value2=value2*10+((*c++)-'0');
2385 if (*c=='.' && isdigit(c[1])) {
2386 c++;
2387 while (isdigit(*c)) {
2388 num=num*10+((*c++)-'0');
2389 denum=denum*10;
2392 if (isalpha(*c)) {
2393 /* scale indicator */
2394 switch (*c) {
2395 case 'i': /* inch -> 10pt */
2396 value2=value2*10+(num*10+denum/2)/denum;
2397 num=0;
2398 break;
2399 default:
2400 break;
2402 c++;
2404 value2=value2+(num+denum/2)/denum;
2405 value2=sign*value2;
2406 opex=1;
2407 if (*c=='.')
2408 opex = -1;
2411 break;
2412 case '\\':
2413 c=scan_escape(c+1);
2414 value2=intresult*sign;
2415 if (isalpha(*c)) c++; /* scale indicator */
2416 opex=1;
2417 break;
2418 case '-':
2419 if (oper) { sign=-1; c++; break; }
2420 case '>':
2421 case '<':
2422 case '+':
2423 case '/':
2424 case '*':
2425 case '%':
2426 case '&':
2427 case '=':
2428 case ':':
2429 if (c[1]=='=') oper=(*c++) +16; else oper=*c;
2430 c++;
2431 break;
2432 default: c++; break;
2434 if (opex > 0) {
2435 sign=1;
2436 switch (oper) {
2437 case 'c': value=value2; break;
2438 case '-': value=value-value2; break;
2439 case '+': value=value+value2; break;
2440 case '*': value=value*value2; break;
2441 case '/': if (value2) value=value/value2; break;
2442 case '%': if (value2) value=value%value2; break;
2443 case '<': value=(value<value2); break;
2444 case '>': value=(value>value2); break;
2445 case '>'+16: value=(value>=value2); break;
2446 case '<'+16: value=(value<=value2); break;
2447 case '=': case '='+16: value=(value==value2); break;
2448 case '&': value = (value && value2); break;
2449 case ':': value = (value || value2); break;
2450 default:
2452 kDebug(7107) << "Unknown operator " << char(oper);
2455 oper=0;
2458 if (*c==')') c++;
2460 *result=value;
2461 return c;
2464 static char *scan_expression(char *c, int *result)
2466 return scan_expression( c, result, 0 );
2469 static void trans_char(char *c, char s, char t)
2471 char *sl=c;
2472 int slash=0;
2473 while (*sl!='\n' || slash) {
2474 if (!slash) {
2475 if (*sl==escapesym)
2476 slash=1;
2477 else if (*sl==s)
2478 *sl=t;
2479 } else slash=0;
2480 sl++;
2484 // 2004-10-19, patched by Waldo Bastian <bastian@kde.org>:
2485 // Fix handling of lines like:
2486 // .TH FIND 1L \" -*- nroff -*-
2487 // Where \" indicates the start of comment.
2489 // The problem is the \" handling in fill_words(), the return value
2490 // indicates the end of the word as well as the end of the line, which makes it
2491 // basically impossible to express that the end of the last word is not the end of
2492 // the line.
2494 // I have corrected that by adding an extra parameter 'next_line' that returns a
2495 // pointer to the next line, while the function itself returns a pointer to the end
2496 // of the last word.
2497 static char *fill_words(char *c, char *words[], int *n, bool newline, char **next_line)
2499 char *sl=c;
2500 int slash=0;
2501 int skipspace=0;
2502 *n=0;
2503 words[*n]=sl;
2504 while (*sl && (*sl!='\n' || slash)) {
2505 if (!slash) {
2506 if (*sl=='"') {
2507 if (skipspace && (*(sl+1)=='"'))
2508 *sl++ = '\a';
2509 else {
2510 *sl='\a';
2511 skipspace=!skipspace;
2513 } else if (*sl==escapesym) {
2514 slash=1;
2515 if (sl[1]=='\n')
2516 *sl='\a';
2517 } else if ((*sl==' ' || *sl=='\t') && !skipspace) {
2518 if (newline) *sl='\n';
2519 if (words[*n]!=sl) (*n)++;
2520 words[*n]=sl+1;
2522 } else {
2523 if (*sl=='"') {
2524 sl--;
2525 if (newline) *sl='\n';
2526 if (words[*n]!=sl) (*n)++;
2527 if (next_line)
2529 char *eow = sl;
2530 sl++;
2531 while (*sl && *sl !='\n') sl++;
2532 *next_line = sl;
2533 return eow;
2535 return sl;
2537 slash=0;
2539 sl++;
2541 if (sl!=words[*n]) (*n)++;
2542 if (next_line) *next_line = sl+1;
2543 return sl;
2546 static const char *abbrev_list[] = {
2547 "GSBG", "Getting Started ",
2548 "SUBG", "Customizing SunOS",
2549 "SHBG", "Basic Troubleshooting",
2550 "SVBG", "SunView User's Guide",
2551 "MMBG", "Mail and Messages",
2552 "DMBG", "Doing More with SunOS",
2553 "UNBG", "Using the Network",
2554 "GDBG", "Games, Demos &amp; Other Pursuits",
2555 "CHANGE", "SunOS 4.1 Release Manual",
2556 "INSTALL", "Installing SunOS 4.1",
2557 "ADMIN", "System and Network Administration",
2558 "SECUR", "Security Features Guide",
2559 "PROM", "PROM User's Manual",
2560 "DIAG", "Sun System Diagnostics",
2561 "SUNDIAG", "Sundiag User's Guide",
2562 "MANPAGES", "SunOS Reference Manual",
2563 "REFMAN", "SunOS Reference Manual",
2564 "SSI", "Sun System Introduction",
2565 "SSO", "System Services Overview",
2566 "TEXT", "Editing Text Files",
2567 "DOCS", "Formatting Documents",
2568 "TROFF", "Using <B>nroff</B> and <B>troff</B>",
2569 "INDEX", "Global Index",
2570 "CPG", "C Programmer's Guide",
2571 "CREF", "C Reference Manual",
2572 "ASSY", "Assembly Language Reference",
2573 "PUL", "Programming Utilities and Libraries",
2574 "DEBUG", "Debugging Tools",
2575 "NETP", "Network Programming",
2576 "DRIVER", "Writing Device Drivers",
2577 "STREAMS", "STREAMS Programming",
2578 "SBDK", "SBus Developer's Kit",
2579 "WDDS", "Writing Device Drivers for the SBus",
2580 "FPOINT", "Floating-Point Programmer's Guide",
2581 "SVPG", "SunView 1 Programmer's Guide",
2582 "SVSPG", "SunView 1 System Programmer's Guide",
2583 "PIXRCT", "Pixrect Reference Manual",
2584 "CGI", "SunCGI Reference Manual",
2585 "CORE", "SunCore Reference Manual",
2586 "4ASSY", "Sun-4 Assembly Language Reference",
2587 "SARCH", "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual",
2588 "KR", "The C Programming Language",
2589 NULL, NULL };
2591 static const char *lookup_abbrev(char *c)
2593 int i=0;
2595 if (!c) return "";
2596 while (abbrev_list[i] && qstrcmp(c,abbrev_list[i])) i=i+2;
2597 if (abbrev_list[i]) return abbrev_list[i+1];
2598 else return c;
2601 static const char *section_list[] = {
2602 #ifdef Q_OS_SOLARIS
2603 // for Solaris
2604 "1", "User Commands",
2605 "1B", "SunOS/BSD Compatibility Package Commands",
2606 "1b", "SunOS/BSD Compatibility Package Commands",
2607 "1C", "Communication Commands ",
2608 "1c", "Communication Commands",
2609 "1F", "FMLI Commands ",
2610 "1f", "FMLI Commands",
2611 "1G", "Graphics and CAD Commands ",
2612 "1g", "Graphics and CAD Commands ",
2613 "1M", "Maintenance Commands",
2614 "1m", "Maintenance Commands",
2615 "1S", "SunOS Specific Commands",
2616 "1s", "SunOS Specific Commands",
2617 "2", "System Calls",
2618 "3", "C Library Functions",
2619 "3B", "SunOS/BSD Compatibility Library Functions",
2620 "3b", "SunOS/BSD Compatibility Library Functions",
2621 "3C", "C Library Functions",
2622 "3c", "C Library Functions",
2623 "3E", "C Library Functions",
2624 "3e", "C Library Functions",
2625 "3F", "Fortran Library Routines",
2626 "3f", "Fortran Library Routines",
2627 "3G", "C Library Functions",
2628 "3g", "C Library Functions",
2629 "3I", "Wide Character Functions",
2630 "3i", "Wide Character Functions",
2631 "3K", "Kernel VM Library Functions",
2632 "3k", "Kernel VM Library Functions",
2633 "3L", "Lightweight Processes Library",
2634 "3l", "Lightweight Processes Library",
2635 "3M", "Mathematical Library",
2636 "3m", "Mathematical Library",
2637 "3N", "Network Functions",
2638 "3n", "Network Functions",
2639 "3R", "Realtime Library",
2640 "3r", "Realtime Library",
2641 "3S", "Standard I/O Functions",
2642 "3s", "Standard I/O Functions",
2643 "3T", "Threads Library",
2644 "3t", "Threads Library",
2645 "3W", "C Library Functions",
2646 "3w", "C Library Functions",
2647 "3X", "Miscellaneous Library Functions",
2648 "3x", "Miscellaneous Library Functions",
2649 "4", "File Formats",
2650 "4B", "SunOS/BSD Compatibility Package File Formats",
2651 "4b", "SunOS/BSD Compatibility Package File Formats",
2652 "5", "Headers, Tables, and Macros",
2653 "6", "Games and Demos",
2654 "7", "Special Files",
2655 "7B", "SunOS/BSD Compatibility Special Files",
2656 "7b", "SunOS/BSD Compatibility Special Files",
2657 "8", "Maintenance Procedures",
2658 "8C", "Maintenance Procedures",
2659 "8c", "Maintenance Procedures",
2660 "8S", "Maintenance Procedures",
2661 "8s", "Maintenance Procedures",
2662 "9", "DDI and DKI",
2663 "9E", "DDI and DKI Driver Entry Points",
2664 "9e", "DDI and DKI Driver Entry Points",
2665 "9F", "DDI and DKI Kernel Functions",
2666 "9f", "DDI and DKI Kernel Functions",
2667 "9S", "DDI and DKI Data Structures",
2668 "9s", "DDI and DKI Data Structures",
2669 "L", "Local Commands",
2670 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
2671 "1", "General Commands",
2672 "2", "System Calls",
2673 "3", "Library Functions",
2674 "4", "Kernel Interfaces",
2675 "5", "File Formats",
2676 "6", "Games",
2677 "7", "Miscellaneous Information",
2678 "8", "System Manager's Manuals",
2679 "9", "Kernel Developer's Manuals",
2680 #else
2681 // Other OS
2682 "1", "User Commands ",
2683 "1C", "User Commands",
2684 "1G", "User Commands",
2685 "1S", "User Commands",
2686 "1V", "User Commands ",
2687 "2", "System Calls",
2688 "2V", "System Calls",
2689 "3", "C Library Functions",
2690 "3C", "Compatibility Functions",
2691 "3F", "Fortran Library Routines",
2692 "3K", "Kernel VM Library Functions",
2693 "3L", "Lightweight Processes Library",
2694 "3M", "Mathematical Library",
2695 "3N", "Network Functions",
2696 "3R", "RPC Services Library",
2697 "3S", "Standard I/O Functions",
2698 "3V", "C Library Functions",
2699 "3X", "Miscellaneous Library Functions",
2700 "4", "Devices and Network Interfaces",
2701 "4F", "Protocol Families",
2702 "4I", "Devices and Network Interfaces",
2703 "4M", "Devices and Network Interfaces",
2704 "4N", "Devices and Network Interfaces",
2705 "4P", "Protocols",
2706 "4S", "Devices and Network Interfaces",
2707 "4V", "Devices and Network Interfaces",
2708 "5", "File Formats",
2709 "5V", "File Formats",
2710 "6", "Games and Demos",
2711 "7", "Environments, Tables, and Troff Macros",
2712 "7V", "Environments, Tables, and Troff Macros",
2713 "8", "Maintenance Commands",
2714 "8C", "Maintenance Commands",
2715 "8S", "Maintenance Commands",
2716 "8V", "Maintenance Commands",
2717 "L", "Local Commands",
2718 #endif
2719 // The defaults
2720 NULL, "Misc. Reference Manual Pages",
2721 NULL, NULL
2724 static const char *section_name(char *c)
2726 int i=0;
2728 if (!c) return "";
2729 while (section_list[i] && qstrcmp(c,section_list[i])) i=i+2;
2730 if (section_list[i+1]) return section_list[i+1];
2731 else return c;
2734 static char *skip_till_newline(char *c)
2736 int lvl=0;
2738 while (*c && (*c!='\n' || lvl>0)) {
2739 if (*c=='\\') {
2740 c++;
2741 if (*c=='}') lvl--; else if (*c=='{') lvl++;
2743 c++;
2745 if (*c) c++;
2746 if (lvl<0 && newline_for_fun) {
2747 newline_for_fun = newline_for_fun+lvl;
2748 if (newline_for_fun<0) newline_for_fun=0;
2750 return c;
2753 static bool s_whileloop = false;
2755 /// Processing the .while request
2756 static void request_while( char*& c, int j, bool mdoc )
2758 // ### TODO: .break and .continue
2759 kDebug(7107) << "Entering .while";
2760 c += j;
2761 char* newline = skip_till_newline( c );
2762 const char oldchar = *newline;
2763 *newline = 0;
2764 // We store the full .while stuff into a QCString as if it would be a macro
2765 const QByteArray macro = c ;
2766 kDebug(7107) << "'Macro' of .while"<< BYTEARRAY( macro );
2767 // Prepare for continuing after .while loop end
2768 *newline = oldchar;
2769 c = newline;
2770 // Process -while loop
2771 const bool oldwhileloop = s_whileloop;
2772 s_whileloop = true;
2773 int result = true; // It must be an int due to the call to scan_expression
2774 while ( result )
2776 // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes
2777 char* liveloop = qstrdup( macro.data() );
2778 kDebug(7107) << "Scanning .while condition";
2779 kDebug(7101) << "Loop macro " << liveloop;
2780 char* end_expression = scan_expression( liveloop, &result );
2781 kDebug(7101) << "After " << end_expression;
2782 if ( result )
2784 kDebug(7107) << "New .while iteration";
2785 // The condition is true, so call the .while's content
2786 char* help = end_expression + 1;
2787 while ( *help && ( *help == ' ' || *help == '\t' ) )
2788 ++help;
2789 if ( ! *help )
2791 // We have a problem, so stop .while
2792 result = false;
2793 break;
2795 if ( mdoc )
2796 scan_troff_mandoc( help, false, 0 );
2797 else
2798 scan_troff( help, false, 0 );
2800 delete[] liveloop;
2804 s_whileloop = oldwhileloop;
2805 kDebug(7107) << "Ending .while";
2808 const int max_wordlist = 100;
2810 /// Processing mixed fonts reqiests like .BI
2811 static void request_mixed_fonts( char*& c, int j, const char* font1, const char* font2, const bool mode, const bool inFMode )
2813 c += j;
2814 if (*c=='\n') c++;
2815 int words;
2816 char *wordlist[max_wordlist];
2817 fill_words(c, wordlist, &words, true, &c);
2818 for (int i=0; i<words; i++)
2820 if ((mode) || (inFMode))
2822 out_html(" ");
2823 curpos++;
2825 wordlist[i][-1]=' ';
2826 out_html( set_font( (i&1) ? font2 : font1 ) );
2827 scan_troff(wordlist[i],1,NULL);
2829 out_html(set_font("R"));
2830 if (mode)
2832 out_html(" ]");
2833 curpos++;
2835 out_html(NEWLINE);
2836 if (!fillout)
2837 curpos=0;
2838 else
2839 curpos++;
2842 // Some known missing requests from man(7):
2843 // - see "safe subset": .tr
2845 // Some known missing requests from mdoc(7):
2846 // - start or end of quotings
2848 // Some of the requests are from mdoc.
2849 // On Linux see the man pages mdoc(7), mdoc.samples(7) and groff_mdoc(7)
2850 // See also the online man pages of FreeBSD: mdoc(7)
2852 #define REQ_UNKNOWN -1
2853 #define REQ_ab 0
2854 #define REQ_di 1
2855 #define REQ_ds 2
2856 #define REQ_as 3
2857 #define REQ_br 4
2858 #define REQ_c2 5
2859 #define REQ_cc 6
2860 #define REQ_ce 7
2861 #define REQ_ec 8
2862 #define REQ_eo 9
2863 #define REQ_ex 10
2864 #define REQ_fc 11
2865 #define REQ_fi 12
2866 #define REQ_ft 13 // groff(7) "FonT"
2867 #define REQ_el 14
2868 #define REQ_ie 15
2869 #define REQ_if 16
2870 #define REQ_ig 17
2871 #define REQ_nf 18
2872 #define REQ_ps 19
2873 #define REQ_sp 20
2874 #define REQ_so 21
2875 #define REQ_ta 22
2876 #define REQ_ti 23
2877 #define REQ_tm 24
2878 #define REQ_B 25
2879 #define REQ_I 26
2880 #define REQ_Fd 27
2881 #define REQ_Fn 28
2882 #define REQ_Fo 29
2883 #define REQ_Fc 30
2884 #define REQ_OP 31
2885 #define REQ_Ft 32
2886 #define REQ_Fa 33
2887 #define REQ_BR 34
2888 #define REQ_BI 35
2889 #define REQ_IB 36
2890 #define REQ_IR 37
2891 #define REQ_RB 38
2892 #define REQ_RI 39
2893 #define REQ_DT 40
2894 #define REQ_IP 41 // man(7) "Indent Paragraph"
2895 #define REQ_TP 42
2896 #define REQ_IX 43
2897 #define REQ_P 44
2898 #define REQ_LP 45
2899 #define REQ_PP 46
2900 #define REQ_HP 47
2901 #define REQ_PD 48
2902 #define REQ_Rs 49
2903 #define REQ_RS 50
2904 #define REQ_Re 51
2905 #define REQ_RE 52
2906 #define REQ_SB 53
2907 #define REQ_SM 54
2908 #define REQ_Ss 55
2909 #define REQ_SS 56
2910 #define REQ_Sh 57
2911 #define REQ_SH 58 // man(7) "Sub Header"
2912 #define REQ_Sx 59
2913 #define REQ_TS 60
2914 #define REQ_Dt 61
2915 #define REQ_TH 62
2916 #define REQ_TX 63
2917 #define REQ_rm 64
2918 #define REQ_rn 65
2919 #define REQ_nx 66
2920 #define REQ_in 67
2921 #define REQ_nr 68 // groff(7) "Number Register"
2922 #define REQ_am 69
2923 #define REQ_de 70
2924 #define REQ_Bl 71 // mdoc(7) "Begin List"
2925 #define REQ_El 72 // mdoc(7) "End List"
2926 #define REQ_It 73 // mdoc(7) "ITem"
2927 #define REQ_Bk 74
2928 #define REQ_Ek 75
2929 #define REQ_Dd 76
2930 #define REQ_Os 77 // mdoc(7)
2931 #define REQ_Bt 78
2932 #define REQ_At 79 // mdoc(7) "AT&t" (not parsable, not callable)
2933 #define REQ_Fx 80 // mdoc(7) "Freebsd" (not parsable, not callable)
2934 #define REQ_Nx 81
2935 #define REQ_Ox 82
2936 #define REQ_Bx 83 // mdoc(7) "Bsd"
2937 #define REQ_Ux 84 // mdoc(7) "UniX"
2938 #define REQ_Dl 85
2939 #define REQ_Bd 86
2940 #define REQ_Ed 87
2941 #define REQ_Be 88
2942 #define REQ_Xr 89 // mdoc(7) "eXternal Reference"
2943 #define REQ_Fl 90 // mdoc(7) "FLag"
2944 #define REQ_Pa 91
2945 #define REQ_Pf 92
2946 #define REQ_Pp 93
2947 #define REQ_Dq 94 // mdoc(7) "Double Quote"
2948 #define REQ_Op 95
2949 #define REQ_Oo 96
2950 #define REQ_Oc 97
2951 #define REQ_Pq 98 // mdoc(7) "Parenthese Quote"
2952 #define REQ_Ql 99
2953 #define REQ_Sq 100 // mdoc(7) "Single Quote"
2954 #define REQ_Ar 101
2955 #define REQ_Ad 102
2956 #define REQ_Em 103 // mdoc(7) "EMphasis"
2957 #define REQ_Va 104
2958 #define REQ_Xc 105
2959 #define REQ_Nd 106
2960 #define REQ_Nm 107
2961 #define REQ_Cd 108
2962 #define REQ_Cm 109
2963 #define REQ_Ic 110
2964 #define REQ_Ms 111
2965 #define REQ_Or 112
2966 #define REQ_Sy 113
2967 #define REQ_Dv 114
2968 #define REQ_Ev 115
2969 #define REQ_Fr 116
2970 #define REQ_Li 117
2971 #define REQ_No 118
2972 #define REQ_Ns 119
2973 #define REQ_Tn 120
2974 #define REQ_nN 121
2975 #define REQ_perc_A 122
2976 #define REQ_perc_D 123
2977 #define REQ_perc_N 124
2978 #define REQ_perc_O 125
2979 #define REQ_perc_P 126
2980 #define REQ_perc_Q 127
2981 #define REQ_perc_V 128
2982 #define REQ_perc_B 129
2983 #define REQ_perc_J 130
2984 #define REQ_perc_R 131
2985 #define REQ_perc_T 132
2986 #define REQ_An 133 // mdoc(7) "Author Name"
2987 #define REQ_Aq 134 // mdoc(7) "Angle bracket Quote"
2988 #define REQ_Bq 135 // mdoc(7) "Bracket Quote"
2989 #define REQ_Qq 136 // mdoc(7) "straight double Quote"
2990 #define REQ_UR 137 // man(7) "URl"
2991 #define REQ_UE 138 // man(7) "Url End"
2992 #define REQ_UN 139 // man(7) "Url Name" (a.k.a. anchors)
2993 #define REQ_troff 140 // groff(7) "TROFF mode"
2994 #define REQ_nroff 141 // groff(7) "NROFF mode"
2995 #define REQ_als 142 // groff(7) "ALias String"
2996 #define REQ_rr 143 // groff(7) "Remove number Register"
2997 #define REQ_rnn 144 // groff(7) "ReName Number register"
2998 #define REQ_aln 145 // groff(7) "ALias Number register"
2999 #define REQ_shift 146 // groff(7) "SHIFT parameter"
3000 #define REQ_while 147 // groff(7) "WHILE loop"
3001 #define REQ_do 148 // groff(7) "DO command"
3002 #define REQ_Dx 149 // mdoc(7) "DragonFly" macro
3004 static int get_request(char *req, int len)
3006 static const char *requests[] = {
3007 "ab", "di", "ds", "as", "br", "c2", "cc", "ce", "ec", "eo", "ex", "fc",
3008 "fi", "ft", "el", "ie", "if", "ig", "nf", "ps", "sp", "so", "ta", "ti",
3009 "tm", "B", "I", "Fd", "Fn", "Fo", "Fc", "OP", "Ft", "Fa", "BR", "BI",
3010 "IB", "IR", "RB", "RI", "DT", "IP", "TP", "IX", "P", "LP", "PP", "HP",
3011 "PD", "Rs", "RS", "Re", "RE", "SB", "SM", "Ss", "SS", "Sh", "SH", "Sx",
3012 "TS", "Dt", "TH", "TX", "rm", "rn", "nx", "in", "nr", "am", "de", "Bl",
3013 "El", "It", "Bk", "Ek", "Dd", "Os", "Bt", "At", "Fx", "Nx", "Ox", "Bx",
3014 "Ux", "Dl", "Bd", "Ed", "Be", "Xr", "Fl", "Pa", "Pf", "Pp", "Dq", "Op",
3015 "Oo", "Oc", "Pq", "Ql", "Sq", "Ar", "Ad", "Em", "Va", "Xc", "Nd", "Nm",
3016 "Cd", "Cm", "Ic", "Ms", "Or", "Sy", "Dv", "Ev", "Fr", "Li", "No", "Ns",
3017 "Tn", "nN", "%A", "%D", "%N", "%O", "%P", "%Q", "%V", "%B", "%J", "%R",
3018 "%T", "An", "Aq", "Bq", "Qq", "UR", "UE", "UN", "troff", "nroff", "als",
3019 "rr", "rnn", "aln", "shift", "while", "do", "Dx", 0 };
3020 int r = 0;
3021 while (requests[r] && qstrncmp(req, requests[r], len)) r++;
3022 return requests[r] ? r : REQ_UNKNOWN;
3025 // &%(#@ c programs !!!
3026 //static int ifelseval=0;
3027 // If/else can be nested!
3028 static QStack<int> s_ifelseval;
3030 // Process a (mdoc) request involving quotes
3031 static char* process_quote(char* c, int j, const char* open, const char* close)
3033 trans_char(c,'"','\a');
3034 c+=j;
3035 if (*c=='\n') c++; // ### TODO: why? Quote requests cannot be empty!
3036 out_html(open);
3037 c=scan_troff_mandoc(c,1,0);
3038 out_html(close);
3039 out_html(NEWLINE);
3040 if (fillout)
3041 curpos++;
3042 else
3043 curpos=0;
3044 return c;
3048 * Is the char \p ch a puntuaction in sence of mdoc(7)
3050 static bool is_mdoc_punctuation( const char ch )
3052 if ( ( ch >= '0' && ch <= '9' ) || ( ch >='A' && ch <='Z' ) || ( ch >= 'a' && ch <= 'z' ) )
3053 return false;
3054 else if ( ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')'
3055 || ch == '[' || ch == ']' )
3056 return true;
3057 else
3058 return false;
3062 * Can the char \p c be part of an identifier
3063 * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters
3064 * See info:/groff/Identifiers
3066 static bool is_identifier_char( const char c )
3068 if ( c >= '!' && c <= '[' ) // Include digits and upper case
3069 return true;
3070 else if ( c >= ']' && c <= '~' ) // Include lower case
3071 return true;
3072 else if ( c== '\\' )
3073 return false; // ### TODO: it should be treated as escape instead!
3074 return false;
3077 static QByteArray scan_identifier( char*& c )
3079 char* h = c; // help pointer
3080 // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers)
3081 while ( *h && *h != '\a' && *h != '\n' && is_identifier_char( *h ) )
3082 ++h;
3083 const char tempchar = *h;
3084 *h = 0;
3085 const QByteArray name = c;
3086 *h = tempchar;
3087 if ( name.isEmpty() )
3089 kDebug(7107) << "EXCEPTION: identifier empty!";
3091 c = h;
3092 return name;
3095 static char *scan_request(char *c)
3097 // mdoc(7) stuff
3098 static bool mandoc_synopsis=false; /* True if we are in the synopsis section */
3099 static bool mandoc_command=false; /* True if this is mdoc(7) page */
3100 static int mandoc_bd_options; /* Only copes with non-nested Bd's */
3101 static int function_argument=0; // Number of function argument (.Fo, .Fa, .Fc)
3102 // man(7) stuff
3103 static bool ur_ignore=false; // Has .UR a parameter : (for .UE to know if or not to write </a>)
3105 int i=0;
3106 bool mode=false;
3107 char *h=0;
3108 char *wordlist[max_wordlist];
3109 int words;
3110 char *sl;
3111 while (*c==' ' || *c=='\t') c++; // Spaces or tabs allowed between control character and request
3112 if (c[0]=='\n') return c+1;
3113 if (c[0]==escapesym)
3115 /* some pages use .\" .\$1 .\} */
3116 /* .\$1 is too difficult/stuppid */
3117 if (c[1]=='$')
3119 kDebug(7107) << "Found .\\$";
3120 c=skip_till_newline(c); // ### TODO
3122 else
3124 c = scan_escape(c+1);
3126 else
3128 int nlen = 0;
3129 QByteArray macroName;
3130 while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym))
3132 macroName+=c[nlen];
3133 nlen++;
3135 int j = nlen;
3136 while (c[j]==' ' || c[j]=='\t') j++;
3137 /* search macro database of self-defined macros */
3138 QMap<QByteArray,StringDefinition>::const_iterator it=s_stringDefinitionMap.find(macroName);
3139 if (it!=s_stringDefinitionMap.end())
3141 kDebug(7107) << "CALLING MACRO: " << BYTEARRAY( macroName );
3142 const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0
3143 s_dollarZero = macroName;
3144 sl=fill_words(c+j, wordlist, &words, true, &c);
3145 *sl='\0';
3146 for (i=1;i<words; i++) wordlist[i][-1]='\0';
3147 for (i=0; i<words; i++)
3149 char *h=NULL;
3150 if (mandoc_command)
3151 scan_troff_mandoc(wordlist[i],1,&h);
3152 else
3153 scan_troff(wordlist[i],1,&h);
3154 wordlist[i] = qstrdup(h);
3155 delete [] h;
3157 for ( i=words; i<max_wordlist; i++ ) wordlist[i]=NULL;
3158 if ( !(*it).m_output.isEmpty() )
3160 //kDebug(7107) << "Macro content is: "<< BYTEARRAY( (*it).m_output );
3161 const unsigned int length = (*it).m_output.length();
3162 char* work = new char [length+2];
3163 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line
3164 qstrncpy(work+1,(*it).m_output.data(),length+1);
3165 const QList<char*> oldArgumentList( s_argumentList );
3166 s_argumentList.clear();
3167 for ( i = 0 ; i < max_wordlist; i++ )
3169 if (!wordlist[i])
3170 break;
3171 s_argumentList.push_back( wordlist[i] );
3173 const int onff=newline_for_fun;
3174 if (mandoc_command)
3175 scan_troff_mandoc( work + 1, 0, NULL );
3176 else
3177 scan_troff( work + 1, 0, NULL);
3178 delete[] work;
3179 newline_for_fun=onff;
3180 s_argumentList = oldArgumentList;
3182 for (i=0; i<words; i++) delete [] wordlist[i];
3183 *sl='\n';
3184 s_dollarZero = oldDollarZero;
3185 kDebug(7107) << "ENDING MACRO: " << BYTEARRAY( macroName );
3187 else
3189 kDebug(7107) << "REQUEST: " << BYTEARRAY( macroName );
3190 switch (int request = get_request(c, nlen))
3192 case REQ_ab: // groff(7) "ABort"
3194 h=c+j;
3195 while (*h && *h !='\n') h++;
3196 *h='\0';
3197 if (scaninbuff && buffpos)
3199 buffer[buffpos]='\0';
3200 kDebug(7107) << "ABORT: " << buffer;
3202 // ### TODO find a way to display it to the user
3203 kDebug(7107) << "Aborting: .ab " << (c+j);
3204 return 0;
3205 break;
3207 case REQ_An: // mdoc(7) "Author Name"
3209 c+=j;
3210 c=scan_troff_mandoc(c,1,0);
3211 break;
3213 case REQ_di: // groff(7) "end current DIversion"
3215 kDebug(7107) << "Start .di";
3216 c+=j;
3217 if (*c=='\n')
3219 ++c;
3220 break;
3222 const QByteArray name ( scan_identifier( c ) );
3223 while (*c && *c!='\n') c++;
3224 c++;
3225 h=c;
3226 while (*c && qstrncmp(c,".di",3)) while (*c && *c++!='\n');
3227 *c='\0';
3228 char* result=0;
3229 scan_troff(h,0,&result);
3230 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
3231 if (it==s_stringDefinitionMap.end())
3233 StringDefinition def;
3234 def.m_length=0;
3235 def.m_output=result;
3236 s_stringDefinitionMap.insert(name,def);
3238 else
3240 (*it).m_length=0;
3241 (*it).m_output=result;
3243 delete[] result;
3244 if (*c) *c='.';
3245 c=skip_till_newline(c);
3246 kDebug(7107) << "end .di";
3247 break;
3249 case REQ_ds: // groff(7) "Define String variable"
3250 mode=true;
3251 case REQ_as: // groff (7) "Append String variable"
3253 kDebug(7107) << "start .ds/.as";
3254 int oldcurpos=curpos;
3255 c+=j;
3256 const QByteArray name( scan_identifier( c) );
3257 if ( name.isEmpty() )
3258 break;
3259 while (*c && isspace(*c)) c++;
3260 if (*c && *c=='"') c++;
3261 single_escape=true;
3262 curpos=0;
3263 char* result=0;
3264 c=scan_troff(c,1,&result);
3265 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
3266 if (it==s_stringDefinitionMap.end())
3268 StringDefinition def;
3269 def.m_length=curpos;
3270 def.m_output=result;
3271 s_stringDefinitionMap.insert(name,def);
3273 else
3275 if (mode)
3276 { // .ds Defining String
3277 (*it).m_length=curpos;
3278 (*it).m_output=result;
3280 else
3281 { // .as Appending String
3282 (*it).m_length+=curpos;
3283 (*it).m_output+=result;
3286 delete[] result;
3287 single_escape=false;
3288 curpos=oldcurpos;
3289 kDebug(7107) << "end .ds/.as";
3290 break;
3292 case REQ_br: // groff(7) "line BReak"
3294 if (still_dd)
3295 out_html("<DD>"); // ### VERIFY (does not look like generating good HTML)
3296 else
3297 out_html("<BR>\n");
3298 curpos=0;
3299 c=c+j;
3300 if (c[0]==escapesym) c=scan_escape(c+1);
3301 c=skip_till_newline(c);
3302 break;
3304 case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break)
3306 c=c+j;
3307 if (*c!='\n')
3308 nobreaksym=*c;
3309 else
3310 nobreaksym='\'';
3311 c=skip_till_newline(c);
3312 break;
3314 case REQ_cc: // groff(7) "reset Control Character"
3316 c=c+j;
3317 if (*c!='\n')
3318 controlsym=*c;
3319 else
3320 controlsym='.';
3321 c=skip_till_newline(c);
3322 break;
3324 case REQ_ce: // groff (7) "CEnter"
3326 c=c+j;
3327 if (*c=='\n')
3328 i=1;
3329 else
3331 i=0;
3332 while ('0'<=*c && *c<='9')
3334 i=i*10+*c-'0';
3335 c++;
3338 c=skip_till_newline(c);
3339 /* center next i lines */
3340 if (i>0)
3342 out_html("<CENTER>\n");
3343 while (i && *c)
3345 char *line=NULL;
3346 c=scan_troff(c,1, &line);
3347 if (line && qstrncmp(line, "<BR>", 4))
3349 out_html(line);
3350 out_html("<BR>\n");
3351 delete [] line; // ### FIXME: memory leak!
3352 i--;
3355 out_html("</CENTER>\n");
3356 curpos=0;
3358 break;
3360 case REQ_ec: // groff(7) "reset Escape Character"
3362 c=c+j;
3363 if (*c!='\n')
3364 escapesym=*c;
3365 else
3366 escapesym='\\';
3367 break;
3368 c=skip_till_newline(c);
3370 case REQ_eo: // groff(7) "turn Escape character Off"
3372 escapesym='\0';
3373 c=skip_till_newline(c);
3374 break;
3376 case REQ_ex: // groff(7) "EXit"
3378 return 0;
3379 break;
3381 case REQ_fc: // groff(7) "set Field and pad Character"
3383 c=c+j;
3384 if (*c=='\n')
3385 fieldsym=padsym='\0';
3386 else
3388 fieldsym=c[0];
3389 padsym=c[1];
3391 c=skip_till_newline(c);
3392 break;
3394 case REQ_fi: // groff(7) "FIll"
3396 if (!fillout)
3398 out_html(set_font("R"));
3399 out_html(change_to_size('0'));
3400 out_html("</PRE>\n");
3402 curpos=0;
3403 fillout=1;
3404 c=skip_till_newline(c);
3405 break;
3407 case REQ_ft: // groff(7) "FonT"
3409 c += j;
3410 h = skip_till_newline( c );
3411 const char oldChar = *h;
3412 *h = 0;
3413 const QByteArray name = c;
3414 // ### TODO: name might contain a variable
3415 if ( name.isEmpty() )
3416 out_html( set_font( "P" ) ); // Previous font
3417 else
3418 out_html( set_font( name ) );
3419 *h = oldChar;
3420 c = h;
3421 break;
3423 case REQ_el: // groff(7) "ELse"
3425 int ifelseval = s_ifelseval.pop();
3426 /* .el anything : else part of if else */
3427 if (ifelseval)
3429 c=c+j;
3430 c[-1]='\n';
3431 c=scan_troff(c,1,NULL);
3433 else
3434 c=skip_till_newline(c+j);
3435 break;
3437 case REQ_ie: // groff(7) "If with Else"
3438 /* .ie c anything : then part of if else */
3439 case REQ_if: // groff(7) "IF"
3441 /* .if c anything
3442 * .if !c anything
3443 * .if N anything
3444 * .if !N anything
3445 * .if 'string1'string2' anything
3446 * .if !'string1'string2' anything
3448 c=c+j;
3449 c=scan_expression(c, &i);
3450 if (request == REQ_ie)
3452 int ifelseval=!i;
3453 s_ifelseval.push( ifelseval );
3455 if (i)
3457 *c='\n';
3458 c++;
3459 c=scan_troff(c,1,NULL);
3461 else
3462 c=skip_till_newline(c);
3463 break;
3465 case REQ_ig: // groff(7) "IGnore"
3467 const char *endwith="..\n";
3468 i=3;
3469 c=c+j;
3470 if (*c!='\n' && *c != '\\')
3472 /* Not newline or comment */
3473 endwith=c-1;i=1;
3474 c[-1]='.';
3475 while (*c && *c!='\n') c++,i++;
3477 c++;
3478 while (*c && qstrncmp(c,endwith,i)) while (*c++!='\n');
3479 while (*c && *c++!='\n');
3480 break;
3482 case REQ_nf: // groff(7) "No Filling"
3484 if (fillout)
3486 out_html(set_font("R"));
3487 out_html(change_to_size('0'));
3488 out_html("<PRE>\n");
3490 curpos=0;
3491 fillout=0;
3492 c=skip_till_newline(c);
3493 break;
3495 case REQ_ps: // groff(7) "previous Point Size"
3497 c=c+j;
3498 if (*c=='\n')
3499 out_html(change_to_size('0'));
3500 else
3502 j=0; i=0;
3503 if (*c=='-')
3505 j= -1;
3506 c++;
3508 else if (*c=='+')
3509 j=1;c++;
3510 c=scan_expression(c, &i);
3511 if (!j)
3513 j=1;
3514 if (i>5) i=i-10;
3516 out_html(change_to_size(i*j));
3518 c=skip_till_newline(c);
3519 break;
3521 case REQ_sp: // groff(7) "SKip one line"
3523 c=c+j;
3524 if (fillout)
3525 out_html("<br><br>");
3526 else
3528 out_html(NEWLINE);
3530 curpos=0;
3531 c=skip_till_newline(c);
3532 break;
3534 case REQ_so: // groff(7) "Include SOurce file"
3536 char *buf;
3537 char *name=NULL;
3538 curpos=0;
3539 c=c+j;
3540 if (*c=='/')
3541 h=c;
3542 else
3544 h=c-3;
3545 h[0]='.';
3546 h[1]='.';
3547 h[2]='/';
3549 while (*c!='\n') c++;
3550 *c='\0';
3551 scan_troff(h,1, &name);
3552 if (name[3]=='/')
3553 h=name+3;
3554 else
3555 h=name;
3556 /* this works alright, except for section 3 */
3557 buf=read_man_page(h);
3558 if (!buf)
3560 kDebug(7107) << "Unable to open or read file: .so " << (h);
3561 out_html("<BLOCKQUOTE>"
3562 "man2html: unable to open or read file.\n");
3563 out_html(h);
3564 out_html("</BLOCKQUOTE>\n");
3566 else
3567 scan_troff(buf+1,0,NULL);
3568 delete [] buf;
3569 delete [] name;
3571 *c++='\n';
3572 break;
3574 case REQ_ta: // gorff(7) "set TAbulators"
3576 c=c+j;
3577 j=0;
3578 while (*c!='\n')
3580 sl=scan_expression(c, &tabstops[j]);
3581 if (j>0 && (*c=='-' || *c=='+')) tabstops[j]+=tabstops[j-1];
3582 c=sl;
3583 while (*c==' ' || *c=='\t') c++;
3584 j++;
3586 maxtstop=j;
3587 curpos=0;
3588 break;
3590 case REQ_ti: // groff(7) "Temporary Indent"
3592 /*while (itemdepth || dl_set[itemdepth]) {
3593 out_html("</DL>\n");
3594 if (dl_set[itemdepth]) dl_set[itemdepth]=0;
3595 else itemdepth--;
3597 out_html("<BR>\n");
3598 c=c+j;
3599 c=scan_expression(c, &j);
3600 for (i=0; i<j; i++) out_html("&nbsp;");
3601 curpos=j;
3602 c=skip_till_newline(c);
3603 break;
3605 case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it
3607 c=c+j;
3608 h=c;
3609 while (*c!='\n') c++;
3610 *c='\0';
3611 kDebug(7107) << ".tm " << (h);
3612 *c='\n';
3613 break;
3615 case REQ_B: // man(7) "Bold"
3616 mode=1;
3617 case REQ_I: // man(7) "Italic"
3619 /* parse one line in a certain font */
3620 out_html( set_font( mode?"B":"I" ) );
3621 fill_words(c, wordlist, &words, false, 0);
3622 c=c+j;
3623 if (*c=='\n') c++;
3624 c=scan_troff(c, 1, NULL);
3625 out_html(set_font("R"));
3626 out_html(NEWLINE);
3627 if (fillout)
3628 curpos++;
3629 else
3630 curpos=0;
3631 break;
3633 case REQ_Fd: // mdoc(7) "Function Definition"
3635 // Normal text must be printed in bold, punctuation in regular font
3636 c+=j;
3637 if (*c=='\n') c++; // ### TODO: verify
3638 sl=fill_words(c, wordlist, &words, true, &c);
3639 for (i=0; i<words; i++)
3641 wordlist[i][-1]=' ';
3642 // ### FIXME In theory, only a single punctuation character is recognized as punctuation
3643 if ( is_mdoc_punctuation ( *wordlist[i] ) )
3644 out_html( set_font ( "R" ) );
3645 else
3646 out_html( set_font ( "B" ) );
3647 scan_troff(wordlist[i],1,NULL);
3648 out_html(" ");
3650 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?)
3651 if (mandoc_synopsis)
3653 out_html("<br>");
3655 out_html(set_font("R"));
3656 out_html(NEWLINE);
3657 if (!fillout)
3658 curpos=0;
3659 else
3660 curpos++;
3661 break;
3663 case REQ_Fn: // mdoc(7) for "Function calls"
3665 // brackets and commas have to be inserted automatically
3666 c+=j;
3667 if (*c=='\n') c++;
3668 sl=fill_words(c, wordlist, &words, true, &c);
3669 if ( words )
3671 for (i=0; i<words; i++)
3673 wordlist[i][-1]=' ';
3674 if ( i )
3675 out_html( set_font( "I" ) );
3676 else
3677 out_html( set_font( "B" ) );
3678 scan_troff(wordlist[i],1,NULL);
3679 out_html( set_font( "R" ) );
3680 if (i==0)
3682 out_html(" (");
3684 else if (i<words-1)
3685 out_html(", ");
3687 out_html(")");
3689 out_html(set_font("R"));
3690 if (mandoc_synopsis)
3691 out_html("<br>");
3692 out_html(NEWLINE);
3693 if (!fillout)
3694 curpos=0;
3695 else
3696 curpos++;
3697 break;
3699 case REQ_Fo: // mdoc(7) "Function definition Opening"
3701 char* font[2] = { (char*)"B", (char*)"R" };
3702 c+=j;
3703 if (*c=='\n') c++;
3704 char *eol=strchr(c,'\n');
3705 char *semicolon=strchr(c,';');
3706 if ((semicolon!=0) && (semicolon<eol)) *semicolon=' ';
3708 sl=fill_words(c, wordlist, &words, true, &c);
3709 // Normally a .Fo has only one parameter
3710 for (i=0; i<words; i++)
3712 wordlist[i][-1]=' ';
3713 out_html(set_font(font[i&1]));
3714 scan_troff(wordlist[i],1,NULL);
3715 if (i==0)
3717 out_html(" (");
3719 // ### TODO What should happen if there is more than one argument
3720 // else if (i<words-1) out_html(", ");
3722 function_argument=1; // Must be > 0
3723 out_html(set_font("R"));
3724 out_html(NEWLINE);
3725 if (!fillout)
3726 curpos=0;
3727 else
3728 curpos++;
3729 break;
3731 case REQ_Fc:// mdoc(7) "Function definition Close"
3733 // .Fc has no parameter
3734 c+=j;
3735 c=skip_till_newline(c);
3736 char* font[2] = { (char*)"B", (char*)"R" };
3737 out_html(set_font(font[i&1]));
3738 out_html(")");
3739 out_html(set_font("R"));
3740 if (mandoc_synopsis)
3741 out_html("<br>");
3742 out_html(NEWLINE);
3743 if (!fillout)
3744 curpos=0;
3745 else
3746 curpos++;
3747 function_argument=0; // Reset the count variable
3748 break;
3750 case REQ_Fa: // mdoc(7) "Function definition argument"
3752 char* font[2] = { (char*)"B", (char*)"R" };
3753 c+=j;
3754 if (*c=='\n') c++;
3755 sl=fill_words(c, wordlist, &words, true, &c);
3756 out_html(set_font(font[i&1]));
3757 // function_argument==0 means that we had no .Fo before, e.g. in mdoc.samples(7)
3758 if (function_argument > 1)
3760 out_html(", ");
3761 curpos+=2;
3762 function_argument++;
3764 else if (function_argument==1)
3766 // We are only at the first parameter
3767 function_argument++;
3769 for (i=0; i<words; i++)
3771 wordlist[i][-1]=' ';
3772 scan_troff(wordlist[i],1,NULL);
3774 out_html(set_font("R"));
3775 if (!fillout)
3776 curpos=0;
3777 else
3778 curpos++;
3779 break;
3782 case REQ_OP: /* groff manpages use this construction */
3784 /* .OP a b : [ <B>a</B> <I>b</I> ] */
3785 mode=true;
3786 out_html(set_font("R"));
3787 out_html("[");
3788 curpos++;
3789 request_mixed_fonts( c, j, "B", "I", true, false );
3790 break;
3791 // Do not break!
3793 case REQ_Ft: //perhaps "Function return type"
3795 request_mixed_fonts( c, j, "B", "I", false, true );
3796 break;
3798 case REQ_BR:
3800 request_mixed_fonts( c, j, "B", "R", false, false );
3801 break;
3803 case REQ_BI:
3805 request_mixed_fonts( c, j, "B", "I", false, false );
3806 break;
3808 case REQ_IB:
3810 request_mixed_fonts( c, j, "I", "B", false, false );
3811 break;
3813 case REQ_IR:
3815 request_mixed_fonts( c, j, "I", "R", false, false );
3816 break;
3818 case REQ_RB:
3820 request_mixed_fonts( c, j, "R", "B", false, false );
3821 break;
3823 case REQ_RI:
3825 request_mixed_fonts( c, j, "R", "I", false, false );
3826 break;
3828 case REQ_DT: // man(7) "Default Tabulators"
3830 for (j=0;j<20; j++) tabstops[j]=(j+1)*8;
3831 maxtstop=20;
3832 c=skip_till_newline(c);
3833 break;
3835 case REQ_IP: // man(7) "Ident Paragraph"
3837 sl=fill_words(c+j, wordlist, &words, true, &c);
3838 if (!dl_set[itemdepth])
3840 out_html("<DL>\n");
3841 dl_set[itemdepth]=1;
3843 out_html("<DT>");
3844 if (words)
3845 scan_troff(wordlist[0], 1,NULL);
3846 out_html("<DD>");
3847 curpos=0;
3848 break;
3850 case REQ_TP: // man(7) "hanging Tag Paragraph"
3852 if (!dl_set[itemdepth])
3854 out_html("<br><br><DL>\n");
3855 dl_set[itemdepth]=1;
3857 out_html("<DT>");
3858 c=skip_till_newline(c);
3859 /* somewhere a definition ends with '.TP' */
3860 if (!*c)
3861 still_dd=true;
3862 else
3864 // HACK for proc(5)
3865 while (c[0]=='.' && c[1]=='\\' && c[2]=='\"')
3867 // We have a comment, so skip the line
3868 c=skip_till_newline(c);
3870 c=scan_troff(c,1,NULL);
3871 out_html("<DD>");
3873 curpos=0;
3874 break;
3876 case REQ_IX: // "INdex" ### TODO: where is it defined?
3878 /* general index */
3879 c=skip_till_newline(c);
3880 break;
3882 case REQ_P: // man(7) "Paragraph"
3883 case REQ_LP:// man(7) "Paragraph"
3884 case REQ_PP:// man(7) "Paragraph; reset Prevailing indent"
3886 if (dl_set[itemdepth])
3888 out_html("</DL>\n");
3889 dl_set[itemdepth]=0;
3891 if (fillout)
3892 out_html("<br><br>\n");
3893 else
3895 out_html(NEWLINE);
3897 curpos=0;
3898 c=skip_till_newline(c);
3899 break;
3901 case REQ_HP: // man(7) "Hanging indent Paragraph"
3903 if (!dl_set[itemdepth])
3905 out_html("<DL>");
3906 dl_set[itemdepth]=1;
3908 out_html("<DT>\n");
3909 still_dd=true;
3910 c=skip_till_newline(c);
3911 curpos=0;
3912 break;
3914 case REQ_PD: // man(7) "Paragraph Distance"
3916 c=skip_till_newline(c);
3917 break;
3919 case REQ_Rs: // mdoc(7) "Relative margin Start"
3920 case REQ_RS: // man(7) "Relative margin Start"
3922 sl=fill_words(c+j, wordlist, &words, true, 0);
3923 j=1;
3924 if (words>0) scan_expression(wordlist[0], &j);
3925 if (j>=0)
3927 itemdepth++;
3928 dl_set[itemdepth]=0;
3929 out_html("<DL><DT><DD>");
3930 c=skip_till_newline(c);
3931 curpos=0;
3932 break;
3935 case REQ_Re: // mdoc(7) "Relative margin End"
3936 case REQ_RE: // man(7) "Relative margin End"
3938 if (itemdepth > 0)
3940 if (dl_set[itemdepth]) out_html("</DL>");
3941 out_html("</DL>\n");
3942 itemdepth--;
3944 c=skip_till_newline(c);
3945 curpos=0;
3946 break;
3948 case REQ_SB: // man(7) "Small; Bold"
3950 out_html(set_font("B"));
3951 out_html("<small>");
3952 trans_char(c,'"','\a'); // ### VERIFY
3953 c=scan_troff(c+j, 1, NULL);
3954 out_html("</small>");
3955 out_html(set_font("R"));
3956 break;
3958 case REQ_SM: // man(7) "SMall"
3960 c=c+j;
3961 if (*c=='\n') c++;
3962 out_html("<small>");
3963 trans_char(c,'"','\a'); // ### VERIFY
3964 c=scan_troff(c,1,NULL);
3965 out_html("</small>");
3966 break;
3968 case REQ_Ss: // mdoc(7) "Sub Section"
3969 mandoc_command = 1;
3970 case REQ_SS: // mdoc(7) "Sub Section"
3971 mode=true;
3972 case REQ_Sh: // mdoc(7) "Sub Header"
3973 /* hack for fallthru from above */
3974 mandoc_command = !mode || mandoc_command;
3975 case REQ_SH: // man(7) "Sub Header"
3977 c=c+j;
3978 if (*c=='\n') c++;
3979 while (itemdepth || dl_set[itemdepth])
3981 out_html("</DL>\n");
3982 if (dl_set[itemdepth])
3983 dl_set[itemdepth]=0;
3984 else if (itemdepth > 0)
3985 itemdepth--;
3987 out_html(set_font("R"));
3988 out_html(change_to_size(0));
3989 if (!fillout)
3991 fillout=1;
3992 out_html("</PRE>");
3994 trans_char(c,'"', '\a');
3995 if (section)
3997 out_html("</div>\n");
3998 section=0;
4000 if (mode)
4001 out_html("\n<H3>");
4002 else
4003 out_html("\n<H2>");
4004 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0;
4005 c = mandoc_command ? scan_troff_mandoc(c,1,NULL) : scan_troff(c,1,NULL);
4006 if (mode)
4007 out_html("</H3>\n");
4008 else
4009 out_html("</H2>\n");
4010 out_html("<div>\n");
4012 section=1;
4013 curpos=0;
4014 break;
4016 case REQ_Sx: // mdoc(7)
4018 // reference to a section header
4019 out_html(set_font("B"));
4020 trans_char(c,'"','\a');
4021 c=c+j;
4022 if (*c=='\n') c++;
4023 c=scan_troff(c, 1, NULL);
4024 out_html(set_font("R"));
4025 out_html(NEWLINE);
4026 if (fillout)
4027 curpos++;
4028 else
4029 curpos=0;
4030 break;
4032 case REQ_TS: // Table Start tbl(1)
4034 c=scan_table(c);
4035 break;
4037 case REQ_Dt: /* mdoc(7) */
4038 mandoc_command = true;
4039 case REQ_TH: // man(7) "Title Header"
4041 if (!output_possible)
4043 sl = fill_words(c+j, wordlist, &words, true, &c);
4044 // ### TODO: the page should be displayed even if it is "anonymous" (words==0)
4045 if (words>=1)
4047 for (i=1; i<words; i++) wordlist[i][-1]='\0';
4048 *sl='\0';
4049 for (i=0; i<words; i++)
4051 if (wordlist[i][0] == '\007')
4052 wordlist[i]++;
4053 if (wordlist[i][qstrlen(wordlist[i])-1] == '\007')
4054 wordlist[i][qstrlen(wordlist[i])-1] = 0;
4056 output_possible=true;
4057 out_html( DOCTYPE"<HTML>\n<HEAD>\n");
4058 #ifdef SIMPLE_MAN2HTML
4059 // Most English man pages are in ISO-8859-1
4060 out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n");
4061 #else
4062 //let KEncodingDetector decide. (it should be better then charset="System")
4063 //TODO can we check if the charset could be determined from path? like share/man/ru.UTF8
4064 // kio_man transforms from local to UTF-8
4065 // out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=");
4066 // out_html(QTextCodec::codecForLocale()->name());
4067 // out_html("\">\n");
4068 #endif
4069 out_html("<TITLE>");
4070 out_html(scan_troff(wordlist[0], 0, NULL));
4071 out_html( " Manpage</TITLE>\n");
4072 out_html( "<link rel=\"stylesheet\" href=\"");
4073 out_html(htmlPath);
4074 out_html("/kde-default.css\" type=\"text/css\">\n" );
4075 out_html( "<meta name=\"ROFF Type\" content=\"");
4076 if (mandoc_command)
4077 out_html("mdoc");
4078 else
4079 out_html("man");
4080 out_html("\">\n");
4081 out_html( "</HEAD>\n\n" );
4082 out_html("<BODY BGCOLOR=\"#FFFFFF\">\n\n" );
4083 out_html("<div style=\"background-image: url(");
4084 out_html(cssPath);
4085 out_html("/top-middle.png); width: 100%; height: 131pt;\">\n" );
4086 out_html("<div style=\"position: absolute; right: 0pt;\">\n");
4087 out_html("<img src=\"");
4088 out_html(htmlPath);
4089 out_html("/top-right-konqueror.png\" style=\"margin: 0pt\" alt=\"Top right\">\n");
4090 out_html("</div>\n");
4092 out_html("<div style=\"position: absolute; left: 0pt;\">\n");
4093 out_html("<img src=\"");
4094 out_html(htmlPath);
4095 out_html("/top-left.png\" style=\"margin: 0pt\" alt=\"Top left\">\n");
4096 out_html("</div>\n");
4097 out_html("<div style=\"position: absolute; top: 25pt; right: 100pt; text-align: right; font-size: xx-large; font-weight: bold; text-shadow: #fff 0pt 0pt 5pt; color: #444\">\n");
4098 out_html( scan_troff(wordlist[0], 0, NULL ) );
4099 out_html("</div>\n");
4100 out_html("</div>\n");
4101 out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n");
4102 out_html("<h1>" );
4103 out_html( scan_troff(wordlist[0], 0, NULL ) );
4104 out_html( "</h1>\n" );
4105 if (words>1)
4107 out_html("Section: " );
4108 if (!mandoc_command && words>4)
4109 out_html(scan_troff(wordlist[4], 0, NULL) );
4110 else
4111 out_html(section_name(wordlist[1]));
4112 out_html(" (");
4113 out_html(scan_troff(wordlist[1], 0, NULL));
4114 out_html(")\n");
4116 else
4118 out_html("Section not specified");
4120 *sl='\n';
4123 else
4125 kWarning(7107) << ".TH found but output not possible" ;
4126 c=skip_till_newline(c);
4128 curpos=0;
4129 break;
4131 case REQ_TX: // mdoc(7)
4133 sl=fill_words(c+j, wordlist, &words, true, &c);
4134 *sl='\0';
4135 out_html(set_font("I"));
4136 if (words>1) wordlist[1][-1]='\0';
4137 const char *c2=lookup_abbrev(wordlist[0]);
4138 curpos+=qstrlen(c2);
4139 out_html(c2);
4140 out_html(set_font("R"));
4141 if (words>1)
4142 out_html(wordlist[1]);
4143 *sl='\n';
4144 break;
4146 case REQ_rm: // groff(7) "ReMove"
4147 /* .rm xx : Remove request, macro or string */
4148 mode=true;
4149 case REQ_rn: // groff(7) "ReName"
4150 /* .rn xx yy : Rename request, macro or string xx to yy */
4152 kDebug(7107) << "start .rm/.rn";
4153 c+=j;
4154 const QByteArray name( scan_identifier( c ) );
4155 if ( name.isEmpty() )
4157 kDebug(7107) << "EXCEPTION: empty origin string to remove/rename";
4158 break;
4160 QByteArray name2;
4161 if ( !mode )
4163 while (*c && isspace(*c) && *c!='\n') ++c;
4164 name2 = scan_identifier( c );
4165 if ( name2.isEmpty() )
4167 kDebug(7107) << "EXCEPTION: empty destination string to rename";
4168 break;
4171 c=skip_till_newline(c);
4172 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
4173 if (it==s_stringDefinitionMap.end())
4175 kDebug(7107) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY( name );
4177 else
4179 if (mode)
4181 // .rm ReMove
4182 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4184 else
4186 // .rn ReName
4187 StringDefinition def=(*it);
4188 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4189 s_stringDefinitionMap.insert(name2,def);
4192 kDebug(7107) << "end .rm/.rn";
4193 break;
4195 case REQ_nx:
4196 case REQ_in: // groff(7) "INdent"
4198 /* .in +-N : Indent */
4199 c=skip_till_newline(c);
4200 break;
4202 case REQ_nr: // groff(7) "Number Register"
4204 kDebug(7107) << "start .nr";
4205 c += j;
4206 const QByteArray name( scan_identifier( c ) );
4207 if ( name.isEmpty() )
4209 kDebug(7107) << "EXCEPTION: empty name for register variable";
4210 break;
4212 while ( *c && ( *c==' ' || *c=='\t' ) ) c++;
4213 int sign = 0;
4214 if ( *c && ( *c == '+' || *c == '-' ) )
4216 if ( *c == '+' )
4217 sign = 1;
4218 else if ( *c == '-' )
4219 sign = -1;
4221 int value = 0;
4222 int increment = 0;
4223 c=scan_expression( c, &value );
4224 if ( *c && *c!='\n')
4226 while ( *c && ( *c==' ' || *c=='\t' ) ) c++;
4227 c=scan_expression( c, &increment );
4229 c = skip_till_newline( c );
4230 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find( name );
4231 if ( it == s_numberDefinitionMap.end() )
4233 if ( sign < 1 )
4234 value = -value;
4235 NumberDefinition def( value, increment );
4236 s_numberDefinitionMap.insert( name, def );
4238 else
4240 if ( sign > 0 )
4241 (*it).m_value += value;
4242 else if ( sign < 0 )
4243 (*it).m_value += - value;
4244 else
4245 (*it).m_value = value;
4246 (*it).m_increment = increment;
4248 kDebug(7107) << "end .nr";
4249 break;
4251 case REQ_am: // groff(7) "Append Macro"
4252 /* .am xx yy : append to a macro. */
4253 /* define or handle as .ig yy */
4254 mode=true;
4255 case REQ_de: // groff(7) "DEfine macro"
4256 /* .de xx yy : define or redefine macro xx; end at .yy (..) */
4257 /* define or handle as .ig yy */
4259 kDebug(7107) << "Start .am/.de";
4260 c+=j;
4261 char *next_line;
4262 sl = fill_words(c, wordlist, &words, true, &next_line);
4263 char *nameStart = wordlist[0];
4264 c = nameStart;
4265 while (*c && (*c != ' ') && (*c != '\n')) c++;
4266 *c = '\0';
4267 const QByteArray name(nameStart);
4269 QByteArray endmacro;
4270 if (words == 1)
4272 endmacro="..";
4274 else
4276 endmacro=".";
4277 c = wordlist[1];
4278 while (*c && (*c != ' ') && (*c != '\n'))
4279 endmacro+=*c++;
4281 c = next_line;
4282 sl=c;
4283 const int length=qstrlen(endmacro);
4284 while (*c && qstrncmp(c,endmacro,length))
4285 c=skip_till_newline(c);
4287 QByteArray macro;
4288 while (sl!=c)
4290 if (sl[0]=='\\' && sl[1]=='\\')
4292 macro+='\\';
4293 sl++;
4295 else
4296 macro+=*sl;
4297 sl++;
4300 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
4301 if (it==s_stringDefinitionMap.end())
4303 StringDefinition def;
4304 def.m_length=0;
4305 def.m_output=macro;
4306 s_stringDefinitionMap.insert(name,def);
4308 else if (mode)
4310 // .am Append Macro
4311 (*it).m_length=0; // It could be formerly a string
4312 if ( ! (*it).m_output.endsWith( '\n' ) )
4313 (*it).m_output+='\n';
4314 (*it).m_output+=macro;
4316 else
4318 // .de DEfine macro
4319 (*it).m_length=0; // It could be formerly a string
4320 (*it).m_output=macro;
4322 c=skip_till_newline(c);
4323 kDebug(7107) << "End .am/.de";
4324 break;
4326 case REQ_Bl: // mdoc(7) "Begin List"
4328 char list_options[NULL_TERMINATED(MED_STR_MAX)];
4329 char *nl = strchr(c,'\n');
4330 c=c+j;
4331 if (dl_set[itemdepth])
4332 /* These things can nest. */
4333 itemdepth++;
4334 if (nl)
4336 /* Parse list options */
4337 strlimitcpy(list_options, c, nl - c, MED_STR_MAX);
4339 if (strstr(list_options, "-bullet"))
4341 /* HTML Unnumbered List */
4342 dl_set[itemdepth] = BL_BULLET_LIST;
4343 out_html("<UL>\n");
4345 else if (strstr(list_options, "-enum"))
4347 /* HTML Ordered List */
4348 dl_set[itemdepth] = BL_ENUM_LIST;
4349 out_html("<OL>\n");
4351 else
4353 /* HTML Descriptive List */
4354 dl_set[itemdepth] = BL_DESC_LIST;
4355 out_html("<DL>\n");
4357 if (fillout)
4358 out_html("<br><br>\n");
4359 else
4361 out_html(NEWLINE);
4363 curpos=0;
4364 c=skip_till_newline(c);
4365 break;
4367 case REQ_El: // mdoc(7) "End List"
4369 c=c+j;
4370 if (dl_set[itemdepth] & BL_DESC_LIST)
4371 out_html("</DL>\n");
4372 else if (dl_set[itemdepth] & BL_BULLET_LIST)
4373 out_html("</UL>\n");
4374 else if (dl_set[itemdepth] & BL_ENUM_LIST)
4375 out_html("</OL>\n");
4376 dl_set[itemdepth]=0;
4377 if (itemdepth > 0) itemdepth--;
4378 if (fillout)
4379 out_html("<br><br>\n");
4380 else
4382 out_html(NEWLINE);
4384 curpos=0;
4385 c=skip_till_newline(c);
4386 break;
4388 case REQ_It: // mdoc(7) "list ITem"
4390 c=c+j;
4391 if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c+2)))
4392 c = skip_till_newline(c);
4393 if (dl_set[itemdepth] & BL_DESC_LIST)
4395 out_html("<DT>");
4396 out_html(set_font("B"));
4397 if (*c=='\n')
4399 /* Don't allow embedded comms after a newline */
4400 c++;
4401 c=scan_troff(c,1,NULL);
4403 else
4405 /* Do allow embedded comms on the same line. */
4406 c=scan_troff_mandoc(c,1,NULL);
4408 out_html(set_font("R"));
4409 out_html(NEWLINE);
4410 out_html("<DD>");
4412 else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST))
4414 out_html("<LI>");
4415 c=scan_troff_mandoc(c,1,NULL);
4416 out_html(NEWLINE);
4418 if (fillout)
4419 curpos++;
4420 else
4421 curpos=0;
4422 break;
4424 case REQ_Bk: /* mdoc(7) */
4425 case REQ_Ek: /* mdoc(7) */
4426 case REQ_Dd: /* mdoc(7) */
4427 case REQ_Os: // mdoc(7) "Operating System"
4429 trans_char(c,'"','\a');
4430 c=c+j;
4431 if (*c=='\n') c++;
4432 c=scan_troff_mandoc(c, 1, NULL);
4433 out_html(NEWLINE);
4434 if (fillout)
4435 curpos++;
4436 else
4437 curpos=0;
4438 break;
4440 case REQ_Bt: // mdoc(7) "Beta Test"
4442 trans_char(c,'"','\a');
4443 c=c+j;
4444 out_html(" is currently in beta test.");
4445 if (fillout)
4446 curpos++;
4447 else
4448 curpos=0;
4449 break;
4451 case REQ_At: /* mdoc(7) */
4452 case REQ_Fx: /* mdoc(7) */
4453 case REQ_Nx: /* mdoc(7) */
4454 case REQ_Ox: /* mdoc(7) */
4455 case REQ_Bx: /* mdoc(7) */
4456 case REQ_Ux: /* mdoc(7) */
4457 case REQ_Dx: /* mdoc(7) */
4459 bool parsable=true;
4460 trans_char(c,'"','\a');
4461 c=c+j;
4462 if (*c=='\n') c++;
4463 if (request==REQ_At)
4465 out_html("AT&amp;T UNIX ");
4466 parsable=false;
4468 else if (request==REQ_Fx)
4470 out_html("FreeBSD ");
4471 parsable=false;
4473 else if (request==REQ_Nx)
4474 out_html("NetBSD ");
4475 else if (request==REQ_Ox)
4476 out_html("OpenBSD ");
4477 else if (request==REQ_Bx)
4478 out_html("BSD ");
4479 else if (request==REQ_Ux)
4480 out_html("UNIX ");
4481 else if (request==REQ_Dx)
4482 out_html("DragonFly ");
4483 if (parsable)
4484 c=scan_troff_mandoc(c,1,0);
4485 else
4486 c=scan_troff(c,1,0);
4487 if (fillout)
4488 curpos++;
4489 else
4490 curpos=0;
4491 break;
4493 case REQ_Dl: /* mdoc(7) */
4495 c=c+j;
4496 out_html(NEWLINE);
4497 out_html("<BLOCKQUOTE>");
4498 if (*c=='\n') c++;
4499 c=scan_troff_mandoc(c, 1, NULL);
4500 out_html("</BLOCKQUOTE>");
4501 if (fillout)
4502 curpos++;
4503 else
4504 curpos=0;
4505 break;
4507 case REQ_Bd: /* mdoc(7) */
4508 { /* Seems like a kind of example/literal mode */
4509 char bd_options[NULL_TERMINATED(MED_STR_MAX)];
4510 char *nl = strchr(c,'\n');
4511 c=c+j;
4512 if (nl)
4513 strlimitcpy(bd_options, c, nl - c, MED_STR_MAX);
4514 out_html(NEWLINE);
4515 mandoc_bd_options = 0; /* Remember options for terminating Bl */
4516 if (strstr(bd_options, "-offset indent"))
4518 mandoc_bd_options |= BD_INDENT;
4519 out_html("<BLOCKQUOTE>\n");
4521 if ( strstr(bd_options, "-literal") || strstr(bd_options, "-unfilled"))
4523 if (fillout)
4525 mandoc_bd_options |= BD_LITERAL;
4526 out_html(set_font("R"));
4527 out_html(change_to_size('0'));
4528 out_html("<PRE>\n");
4530 curpos=0;
4531 fillout=0;
4533 c=skip_till_newline(c);
4534 break;
4536 case REQ_Ed: /* mdoc(7) */
4538 if (mandoc_bd_options & BD_LITERAL)
4540 if (!fillout)
4542 out_html(set_font("R"));
4543 out_html(change_to_size('0'));
4544 out_html("</PRE>\n");
4547 if (mandoc_bd_options & BD_INDENT)
4548 out_html("</BLOCKQUOTE>\n");
4549 curpos=0;
4550 fillout=1;
4551 c=skip_till_newline(c);
4552 break;
4554 case REQ_Be: /* mdoc(7) */
4556 c=c+j;
4557 if (fillout)
4558 out_html("<br><br>");
4559 else
4561 out_html(NEWLINE);
4563 curpos=0;
4564 c=skip_till_newline(c);
4565 break;
4567 case REQ_Xr: /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly
4569 /* Translate xyz 1 to xyz(1)
4570 * Allow for multiple spaces. Allow the section to be missing.
4572 char buff[NULL_TERMINATED(MED_STR_MAX)];
4573 char *bufptr;
4574 trans_char(c,'"','\a');
4575 bufptr = buff;
4576 c = c+j;
4577 if (*c == '\n') c++; /* Skip spaces */
4578 while (isspace(*c) && *c != '\n') c++;
4579 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-')
4581 /* Copy the xyz part */
4582 *bufptr = *c;
4583 bufptr++;
4584 if (bufptr >= buff + MED_STR_MAX) break;
4585 c++;
4587 while (isspace(*c) && *c != '\n') c++; /* Skip spaces */
4588 if (isdigit(*c))
4590 /* Convert the number if there is one */
4591 *bufptr = '(';
4592 bufptr++;
4593 if (bufptr < buff + MED_STR_MAX)
4595 while (isalnum(*c))
4597 *bufptr = *c;
4598 bufptr++;
4599 if (bufptr >= buff + MED_STR_MAX) break;
4600 c++;
4602 if (bufptr < buff + MED_STR_MAX)
4604 *bufptr = ')';
4605 bufptr++;
4609 while (*c != '\n')
4611 /* Copy the remainder */
4612 if (!isspace(*c))
4614 *bufptr = *c;
4615 bufptr++;
4616 if (bufptr >= buff + MED_STR_MAX) break;
4618 c++;
4620 *bufptr = '\n';
4621 bufptr[1] = 0;
4622 scan_troff_mandoc(buff, 1, NULL);
4623 out_html(NEWLINE);
4624 if (fillout)
4625 curpos++;
4626 else
4627 curpos=0;
4628 break;
4630 case REQ_Fl: // mdoc(7) "FLags"
4632 trans_char(c,'"','\a');
4633 c+=j;
4634 sl=fill_words(c, wordlist, &words, true, &c);
4635 out_html(set_font("B"));
4636 if (!words)
4638 out_html("-"); // stdin or stdout
4640 else
4642 for (i=0;i<words;++i)
4644 if (ispunct(wordlist[i][0]) && wordlist[i][0]!='-')
4646 scan_troff_mandoc(wordlist[i], 1, NULL);
4648 else
4650 if (i>0)
4651 out_html(" "); // Put a space between flags
4652 out_html("-");
4653 scan_troff_mandoc(wordlist[i], 1, NULL);
4657 out_html(set_font("R"));
4658 out_html(NEWLINE);
4659 if (fillout)
4660 curpos++;
4661 else
4662 curpos=0;
4663 break;
4665 case REQ_Pa: /* mdoc(7) */
4666 case REQ_Pf: /* mdoc(7) */
4668 trans_char(c,'"','\a');
4669 c=c+j;
4670 if (*c=='\n') c++;
4671 c=scan_troff_mandoc(c, 1, NULL);
4672 out_html(NEWLINE);
4673 if (fillout)
4674 curpos++;
4675 else
4676 curpos=0;
4677 break;
4679 case REQ_Pp: /* mdoc(7) */
4681 if (fillout)
4682 out_html("<br><br>\n");
4683 else
4685 out_html(NEWLINE);
4687 curpos=0;
4688 c=skip_till_newline(c);
4689 break;
4691 case REQ_Aq: // mdoc(7) "Angle bracket Quote"
4692 c=process_quote(c,j,"&lt;","&gt;");
4693 break;
4694 case REQ_Bq: // mdoc(7) "Bracket Quote"
4695 c=process_quote(c,j,"[","]");
4696 break;
4697 case REQ_Dq: // mdoc(7) "Double Quote"
4698 c=process_quote(c,j,"&ldquo;","&rdquo;");
4699 break;
4700 case REQ_Pq: // mdoc(7) "Parenthese Quote"
4701 c=process_quote(c,j,"(",")");
4702 break;
4703 case REQ_Qq: // mdoc(7) "straight double Quote"
4704 c=process_quote(c,j,"&quot;","&quot;");
4705 break;
4706 case REQ_Sq: // mdoc(7) "Single Quote"
4707 c=process_quote(c,j,"&lsquo;","&rsquo;");
4708 break;
4709 case REQ_Op: /* mdoc(7) */
4711 trans_char(c,'"','\a');
4712 c=c+j;
4713 if (*c=='\n') c++;
4714 out_html(set_font("R"));
4715 out_html("[");
4716 c=scan_troff_mandoc(c, 1, NULL);
4717 out_html(set_font("R"));
4718 out_html("]");
4719 out_html(NEWLINE);
4720 if (fillout)
4721 curpos++;
4722 else
4723 curpos=0;
4724 break;
4726 case REQ_Oo: /* mdoc(7) */
4728 trans_char(c,'"','\a');
4729 c=c+j;
4730 if (*c=='\n') c++;
4731 out_html(set_font("R"));
4732 out_html("[");
4733 c=scan_troff_mandoc(c, 1, NULL);
4734 if (fillout)
4735 curpos++;
4736 else
4737 curpos=0;
4738 break;
4740 case REQ_Oc: /* mdoc(7) */
4742 trans_char(c,'"','\a');
4743 c=c+j;
4744 c=scan_troff_mandoc(c, 1, NULL);
4745 out_html(set_font("R"));
4746 out_html("]");
4747 if (fillout)
4748 curpos++;
4749 else
4750 curpos=0;
4751 break;
4753 case REQ_Ql: /* mdoc(7) */
4755 /* Single quote first word in the line */
4756 char *sp;
4757 trans_char(c,'"','\a');
4758 c=c+j;
4759 if (*c=='\n') c++;
4760 sp = c;
4763 /* Find first whitespace after the
4764 * first word that isn't a mandoc macro
4766 while (*sp && isspace(*sp)) sp++;
4767 while (*sp && !isspace(*sp)) sp++;
4768 } while (*sp && isupper(*(sp-2)) && islower(*(sp-1)));
4770 /* Use a newline to mark the end of text to
4771 * be quoted
4773 if (*sp) *sp = '\n';
4774 out_html("`"); /* Quote the text */
4775 c=scan_troff_mandoc(c, 1, NULL);
4776 out_html("'");
4777 out_html(NEWLINE);
4778 if (fillout)
4779 curpos++;
4780 else
4781 curpos=0;
4782 break;
4784 case REQ_Ar: /* mdoc(7) */
4786 /* parse one line in italics */
4787 out_html(set_font("I"));
4788 trans_char(c,'"','\a');
4789 c=c+j;
4790 if (*c=='\n')
4792 /* An empty Ar means "file ..." */
4793 out_html("file ...");
4795 else
4796 c=scan_troff_mandoc(c, 1, NULL);
4797 out_html(set_font("R"));
4798 out_html(NEWLINE);
4799 if (fillout)
4800 curpos++;
4801 else
4802 curpos=0;
4803 break;
4805 case REQ_Em: /* mdoc(7) */
4807 out_html("<em>");
4808 trans_char(c,'"','\a');
4809 c+=j;
4810 if (*c=='\n') c++;
4811 c=scan_troff_mandoc(c, 1, NULL);
4812 out_html("</em>");
4813 out_html(NEWLINE);
4814 if (fillout)
4815 curpos++;
4816 else
4817 curpos=0;
4818 break;
4820 case REQ_Ad: /* mdoc(7) */
4821 case REQ_Va: /* mdoc(7) */
4822 case REQ_Xc: /* mdoc(7) */
4824 /* parse one line in italics */
4825 out_html(set_font("I"));
4826 trans_char(c,'"','\a');
4827 c=c+j;
4828 if (*c=='\n') c++;
4829 c=scan_troff_mandoc(c, 1, NULL);
4830 out_html(set_font("R"));
4831 out_html(NEWLINE);
4832 if (fillout)
4833 curpos++;
4834 else
4835 curpos=0;
4836 break;
4838 case REQ_Nd: /* mdoc(7) */
4840 trans_char(c,'"','\a');
4841 c=c+j;
4842 if (*c=='\n') c++;
4843 out_html(" - ");
4844 c=scan_troff_mandoc(c, 1, NULL);
4845 out_html(NEWLINE);
4846 if (fillout)
4847 curpos++;
4848 else
4849 curpos=0;
4850 break;
4852 case REQ_Nm: // mdoc(7) "Name Macro" ### FIXME
4854 static char mandoc_name[NULL_TERMINATED(SMALL_STR_MAX)] = ""; // ### TODO Use QByteArray
4855 trans_char(c,'"','\a');
4856 c=c+j;
4858 if (mandoc_synopsis && mandoc_name_count)
4860 /* Break lines only in the Synopsis.
4861 * The Synopsis section seems to be treated
4862 * as a special case - Bummer!
4864 out_html("<BR>");
4866 else if (!mandoc_name_count)
4868 const char *nextbreak = strchr(c, '\n');
4869 const char *nextspace = strchr(c, ' ');
4870 if (nextspace < nextbreak)
4871 nextbreak = nextspace;
4873 if (nextbreak)
4875 /* Remember the name for later. */
4876 strlimitcpy(mandoc_name, c, nextbreak - c, SMALL_STR_MAX);
4879 mandoc_name_count++;
4881 out_html(set_font("B"));
4882 // ### FIXME: fill_words must be used
4883 while (*c == ' '|| *c == '\t') c++;
4884 if ((tolower(*c) >= 'a' && tolower(*c) <= 'z' ) || (*c >= '0' && *c <= '9'))
4886 // alphanumeric argument
4887 c=scan_troff_mandoc(c, 1, NULL);
4888 out_html(set_font("R"));
4889 out_html(NEWLINE);
4891 else
4893 /* If Nm has no argument, use one from an earlier
4894 * Nm command that did have one. Hope there aren't
4895 * too many commands that do this.
4897 out_html(mandoc_name);
4898 out_html(set_font("R"));
4901 if (fillout)
4902 curpos++;
4903 else
4904 curpos=0;
4905 break;
4907 case REQ_Cd: /* mdoc(7) */
4908 case REQ_Cm: /* mdoc(7) */
4909 case REQ_Ic: /* mdoc(7) */
4910 case REQ_Ms: /* mdoc(7) */
4911 case REQ_Or: /* mdoc(7) */
4912 case REQ_Sy: /* mdoc(7) */
4914 /* parse one line in bold */
4915 out_html(set_font("B"));
4916 trans_char(c,'"','\a');
4917 c=c+j;
4918 if (*c=='\n') c++;
4919 c=scan_troff_mandoc(c, 1, NULL);
4920 out_html(set_font("R"));
4921 out_html(NEWLINE);
4922 if (fillout)
4923 curpos++;
4924 else
4925 curpos=0;
4926 break;
4928 // ### FIXME: punctuation is handled badly!
4929 case REQ_Dv: /* mdoc(7) */
4930 case REQ_Ev: /* mdoc(7) */
4931 case REQ_Fr: /* mdoc(7) */
4932 case REQ_Li: /* mdoc(7) */
4933 case REQ_No: /* mdoc(7) */
4934 case REQ_Ns: /* mdoc(7) */
4935 case REQ_Tn: /* mdoc(7) */
4936 case REQ_nN: /* mdoc(7) */
4938 trans_char(c,'"','\a');
4939 c=c+j;
4940 if (*c=='\n') c++;
4941 out_html(set_font("B"));
4942 c=scan_troff_mandoc(c, 1, NULL);
4943 out_html(set_font("R"));
4944 out_html(NEWLINE);
4945 if (fillout)
4946 curpos++;
4947 else
4948 curpos=0;
4949 break;
4951 case REQ_perc_A: /* mdoc(7) biblio stuff */
4952 case REQ_perc_D:
4953 case REQ_perc_N:
4954 case REQ_perc_O:
4955 case REQ_perc_P:
4956 case REQ_perc_Q:
4957 case REQ_perc_V:
4959 c=c+j;
4960 if (*c=='\n') c++;
4961 c=scan_troff(c, 1, NULL); /* Don't allow embedded mandoc coms */
4962 if (fillout)
4963 curpos++;
4964 else
4965 curpos=0;
4966 break;
4968 case REQ_perc_B:
4969 case REQ_perc_J:
4970 case REQ_perc_R:
4971 case REQ_perc_T:
4973 c=c+j;
4974 out_html(set_font("I"));
4975 if (*c=='\n') c++;
4976 c=scan_troff(c, 1, NULL); /* Don't allow embedded mandoc coms */
4977 out_html(set_font("R"));
4978 if (fillout)
4979 curpos++;
4980 else
4981 curpos=0;
4982 break;
4984 case REQ_UR: // ### FIXME man(7) "URl"
4986 ignore_links=true;
4987 c+=j;
4988 char* newc;
4989 h=fill_words(c, wordlist, &words, false, &newc);
4990 *h=0;
4991 if (words>0)
4993 h=wordlist[0];
4994 // A parameter : means that we do not want an URL, not here and not until .UE
4995 ur_ignore=(!qstrcmp(h,":"));
4997 else
4999 // We cannot find the URL, assume :
5000 ur_ignore=true;
5001 h=0;
5003 if (!ur_ignore && words>0)
5005 out_html("<a href=\"");
5006 out_html(h);
5007 out_html("\">");
5009 c=newc; // Go to next line
5010 break;
5012 case REQ_UE: // ### FIXME man(7) "Url End"
5014 c+=j;
5015 c = skip_till_newline(c);
5016 if (!ur_ignore)
5018 out_html("</a>");
5020 ur_ignore=false;
5021 ignore_links=false;
5022 break;
5024 case REQ_UN: // ### FIXME man(7) "Url Named anchor"
5026 c+=j;
5027 char* newc;
5028 h=fill_words(c, wordlist, &words, false, &newc);
5029 *h=0;
5030 if (words>0)
5032 h=wordlist[0];
5033 out_html("<a name=\">");
5034 out_html(h);
5035 out_html("\" id=\"");
5036 out_html(h);
5037 out_html("\"></a>");
5039 c=newc;
5040 break;
5042 case REQ_nroff: // groff(7) "NROFF mode"
5043 mode = true;
5044 case REQ_troff: // groff(7) "TROFF mode"
5046 s_nroff = mode;
5047 c+=j;
5048 c = skip_till_newline(c);
5050 case REQ_als: // groff(7) "ALias String"
5053 * Note an alias is supposed to be something like a hard link
5054 * However to make it simplier, we only copy the string.
5056 // Be careful: unlike .rn, the destination is first, origin is second
5057 kDebug(7107) << "start .als";
5058 c+=j;
5059 const QByteArray name ( scan_identifier( c ) );
5060 if ( name.isEmpty() )
5062 kDebug(7107) << "EXCEPTION: empty destination string to alias";
5063 break;
5065 while (*c && isspace(*c) && *c!='\n') ++c;
5066 const QByteArray name2 ( scan_identifier ( c ) );
5067 if ( name2.isEmpty() )
5069 kDebug(7107) << "EXCEPTION: empty origin string to alias";
5070 break;
5072 kDebug(7107) << "Alias " << BYTEARRAY( name2 ) << " to " << BYTEARRAY( name );
5073 c=skip_till_newline(c);
5074 if ( name == name2 )
5076 kDebug(7107) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY( name );
5077 break;
5079 // Second parameter is origin (unlike in .rn)
5080 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name2);
5081 if (it==s_stringDefinitionMap.end())
5083 kDebug(7107) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY( name2 );
5085 else
5087 StringDefinition def=(*it);
5088 s_stringDefinitionMap.insert(name,def);
5090 kDebug(7107) << "end .als";
5091 break;
5093 case REQ_rr: // groff(7) "Remove number Register"
5095 kDebug(7107) << "start .rr";
5096 c += j;
5097 const QByteArray name ( scan_identifier( c ) );
5098 if ( name.isEmpty() )
5100 kDebug(7107) << "EXCEPTION: empty origin string to remove/rename: ";
5101 break;
5103 c = skip_till_newline( c );
5104 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find( name );
5105 if ( it == s_numberDefinitionMap.end() )
5107 kDebug(7107) << "EXCEPTION: trying to remove inexistant number register: ";
5109 else
5111 s_numberDefinitionMap.remove( name );
5113 kDebug(7107) << "end .rr";
5114 break;
5116 case REQ_rnn: // groff(7) "ReName Number register"
5118 kDebug(7107) << "start .rnn";
5119 c+=j;
5120 const QByteArray name ( scan_identifier ( c ) );
5121 if ( name.isEmpty() )
5123 kDebug(7107) << "EXCEPTION: empty origin to remove/rename number register";
5124 break;
5126 while (*c && isspace(*c) && *c!='\n') ++c;
5127 const QByteArray name2 ( scan_identifier ( c ) );
5128 if ( name2.isEmpty() )
5130 kDebug(7107) << "EXCEPTION: empty destination to rename number register";
5131 break;
5133 c = skip_till_newline( c );
5134 QMap<QByteArray,NumberDefinition>::iterator it=s_numberDefinitionMap.find(name);
5135 if (it==s_numberDefinitionMap.end())
5137 kDebug(7107) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY( name );
5139 else
5141 NumberDefinition def=(*it);
5142 s_numberDefinitionMap.remove(name); // ### QT4: removeAll
5143 s_numberDefinitionMap.insert(name2,def);
5145 kDebug(7107) << "end .rnn";
5146 break;
5148 case REQ_aln: // groff(7) "ALias Number Register"
5151 * Note an alias is supposed to be something like a hard link
5152 * However to make it simplier, we only copy the string.
5154 // Be careful: unlike .rnn, the destination is first, origin is second
5155 kDebug(7107) << "start .aln";
5156 c+=j;
5157 const QByteArray name ( scan_identifier( c ) );
5158 if ( name.isEmpty() )
5160 kDebug(7107) << "EXCEPTION: empty destination number register to alias";
5161 break;
5163 while (*c && isspace(*c) && *c!='\n') ++c;
5164 const QByteArray name2 ( scan_identifier( c ) );
5165 if ( name2.isEmpty() )
5167 kDebug(7107) << "EXCEPTION: empty origin number register to alias";
5168 break;
5170 kDebug(7107) << "Alias " << BYTEARRAY( name2 ) << " to " << BYTEARRAY( name );
5171 c = skip_till_newline( c );
5172 if ( name == name2 )
5174 kDebug(7107) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY( name );
5175 break;
5177 // Second parameter is origin (unlike in .rnn)
5178 QMap<QByteArray,NumberDefinition>::iterator it=s_numberDefinitionMap.find(name2);
5179 if (it==s_numberDefinitionMap.end())
5181 kDebug(7107) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY( name2 );
5183 else
5185 NumberDefinition def=(*it);
5186 s_numberDefinitionMap.insert(name,def);
5188 kDebug(7107) << "end .aln";
5189 break;
5191 case REQ_shift: // groff(7) "SHIFT parameter"
5193 c+=j;
5194 h=c;
5195 while (*h && *h!='\n' && isdigit(*h) ) ++h;
5196 const char tempchar = *h;
5197 *h = 0;
5198 const QByteArray number( c );
5199 *h = tempchar;
5200 c = skip_till_newline( h );
5201 unsigned int result = 1; // Numbers of shifts to do
5202 if ( !number.isEmpty() )
5204 bool ok = false;
5205 result = number.toUInt(&ok);
5206 if ( !ok || result < 1 )
5207 result = 1;
5209 for ( unsigned int num = 0; num < result; ++num )
5211 if ( !s_argumentList.isEmpty() )
5212 s_argumentList.pop_front();
5214 break;
5216 case REQ_while: // groff(7) "WHILE loop"
5218 request_while( c, j, mandoc_command );
5219 break;
5221 case REQ_do: // groff(7) "DO command"
5223 // ### HACK: we just replace do by a \n and a .
5224 *c = '\n';
5225 c++;
5226 *c = '.';
5227 // The . will be treated as next character
5228 break;
5230 default:
5232 if (mandoc_command &&
5233 ((isupper(*c) && islower(*(c+1)))
5234 || (islower(*c) && isupper(*(c+1)))) )
5236 /* Let through any mdoc(7) commands that haven't
5237 * been delt with.
5238 * I don't want to miss anything out of the text.
5240 char buf[4] = { c[0], c[1], ' ', 0 };
5241 out_html(buf); /* Print the command (it might just be text). */
5242 c=c+j;
5243 trans_char(c,'"','\a');
5244 if (*c=='\n') c++;
5245 out_html(set_font("R"));
5246 c=scan_troff(c, 1, NULL);
5247 out_html(NEWLINE);
5248 if (fillout)
5249 curpos++;
5250 else
5251 curpos=0;
5253 else
5254 c=skip_till_newline(c);
5255 break;
5260 if (fillout)
5262 out_html(NEWLINE);
5263 curpos++;
5265 return c;
5268 static int contained_tab=0;
5269 static bool mandoc_line=false; /* Signals whether to look for embedded mandoc
5270 * commands.
5273 static char *scan_troff(char *c, bool san, char **result)
5274 { /* san : stop at newline */
5275 char *h;
5276 char intbuff[NULL_TERMINATED(MED_STR_MAX)];
5277 int ibp=0;
5278 #define FLUSHIBP if (ibp) { intbuff[ibp]=0; out_html(intbuff); ibp=0; }
5279 char *exbuffer;
5280 int exbuffpos, exbuffmax, exnewline_for_fun;
5281 bool exscaninbuff;
5282 int usenbsp=0;
5284 exbuffer=buffer;
5285 exbuffpos=buffpos;
5286 exbuffmax=buffmax;
5287 exnewline_for_fun=newline_for_fun;
5288 exscaninbuff=scaninbuff;
5289 newline_for_fun=0;
5290 if (result) {
5291 if (*result) {
5292 buffer=*result;
5293 buffpos=qstrlen(buffer);
5294 buffmax=buffpos;
5295 } else {
5296 buffer = stralloc(LARGE_STR_MAX);
5297 buffpos=0;
5298 buffmax=LARGE_STR_MAX;
5300 scaninbuff=true;
5302 h=c; // ### FIXME below are too many tests that may go before the position of c
5303 /* start scanning */
5305 // ### VERIFY: a dot must be at first position, we cannot add newlines or it would allow spaces before a dot
5306 while (*h == ' ')
5308 #if 1
5309 ++h;
5310 #else
5311 *h++ = '\n';
5312 #endif
5315 while (h && *h && (!san || newline_for_fun || *h!='\n')) {
5317 if (*h==escapesym) {
5318 h++;
5319 FLUSHIBP;
5320 h = scan_escape(h);
5321 } else if (*h==controlsym && h[-1]=='\n') {
5322 h++;
5323 FLUSHIBP;
5324 h = scan_request(h);
5325 if (h && san && h[-1]=='\n') h--;
5326 } else if (mandoc_line
5327 && ((*(h-1)) && (isspace(*(h-1)) || (*(h-1))=='\n'))
5328 && *(h) && isupper(*(h))
5329 && *(h+1) && islower(*(h+1))
5330 && *(h+2) && isspace(*(h+2))) {
5331 // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
5332 FLUSHIBP;
5333 h = scan_request(h);
5334 if (san && h[-1]=='\n') h--;
5335 } else if (*h==nobreaksym && h[-1]=='\n') {
5336 h++;
5337 FLUSHIBP;
5338 h = scan_request(h);
5339 if (san && h[-1]=='\n') h--;
5340 } else {
5341 /* int mx; */
5342 if (still_dd && isalnum(*h) && h[-1]=='\n') {
5343 /* sometimes a .HP request is not followed by a .br request */
5344 FLUSHIBP;
5345 out_html("<DD>");
5346 curpos=0;
5347 still_dd=false;
5349 switch (*h) {
5350 case '&':
5351 intbuff[ibp++]='&';
5352 intbuff[ibp++]='a';
5353 intbuff[ibp++]='m';
5354 intbuff[ibp++]='p';
5355 intbuff[ibp++]=';';
5356 curpos++;
5357 break;
5358 case '<':
5359 intbuff[ibp++]='&';
5360 intbuff[ibp++]='l';
5361 intbuff[ibp++]='t';
5362 intbuff[ibp++]=';';
5363 curpos++;
5364 break;
5365 case '>':
5366 intbuff[ibp++]='&';
5367 intbuff[ibp++]='g';
5368 intbuff[ibp++]='t';
5369 intbuff[ibp++]=';';
5370 curpos++;
5371 break;
5372 case '"':
5373 intbuff[ibp++]='&';
5374 intbuff[ibp++]='q';
5375 intbuff[ibp++]='u';
5376 intbuff[ibp++]='o';
5377 intbuff[ibp++]='t';
5378 intbuff[ibp++]=';';
5379 curpos++;
5380 break;
5381 case '\n':
5382 if (h != c && h[-1]=='\n' && fillout) {
5383 intbuff[ibp++]='<';
5384 intbuff[ibp++]='P';
5385 intbuff[ibp++]='>';
5387 if (contained_tab && fillout) {
5388 intbuff[ibp++]='<';
5389 intbuff[ibp++]='B';
5390 intbuff[ibp++]='R';
5391 intbuff[ibp++]='>';
5393 contained_tab=0;
5394 curpos=0;
5395 usenbsp=0;
5396 intbuff[ibp++]='\n';
5397 break;
5398 case '\t':
5400 int curtab=0;
5401 contained_tab=1;
5402 FLUSHIBP;
5403 /* like a typewriter, not like TeX */
5404 tabstops[19]=curpos+1;
5405 while (curtab<maxtstop && tabstops[curtab]<=curpos)
5406 curtab++;
5407 if (curtab<maxtstop) {
5408 if (!fillout) {
5409 while (curpos<tabstops[curtab]) {
5410 intbuff[ibp++]=' ';
5411 if (ibp>480) { FLUSHIBP; }
5412 curpos++;
5414 } else {
5415 out_html("<TT>");
5416 while (curpos<tabstops[curtab]) {
5417 out_html("&nbsp;");
5418 curpos++;
5420 out_html("</TT>");
5424 break;
5425 default:
5426 if (*h==' ' && (h[-1]=='\n' || usenbsp)) {
5427 FLUSHIBP;
5428 if (!usenbsp && fillout) {
5429 out_html("<BR>");
5430 curpos=0;
5432 usenbsp=fillout;
5433 if (usenbsp) out_html("&nbsp;"); else intbuff[ibp++]=' ';
5434 } else if (*h>31 && *h<127) intbuff[ibp++]=*h;
5435 else if (((unsigned char)(*h))>127) {
5436 intbuff[ibp++]=*h;
5438 curpos++;
5439 break;
5441 if (ibp > (MED_STR_MAX - 20)) FLUSHIBP;
5442 h++;
5445 FLUSHIBP;
5446 if (buffer) buffer[buffpos]='\0';
5447 if (san && h && *h) h++;
5448 newline_for_fun=exnewline_for_fun;
5449 if (result) {
5450 *result = buffer;
5451 buffer=exbuffer;
5452 buffpos=exbuffpos;
5453 buffmax=exbuffmax;
5454 scaninbuff=exscaninbuff;
5457 return h;
5461 static char *scan_troff_mandoc(char *c, bool san, char **result)
5463 char *ret;
5464 char *end = c;
5465 bool oldval = mandoc_line;
5466 mandoc_line = true;
5467 while (*end && *end != '\n') {
5468 end++;
5471 if (end > c + 2
5472 && ispunct(*(end - 1))
5473 && isspace(*(end - 2)) && *(end - 2) != '\n') {
5474 /* Don't format lonely punctuation E.g. in "xyz ," format
5475 * the xyz and then append the comma removing the space.
5477 *(end - 2) = '\n';
5478 ret = scan_troff(c, san, result);
5479 *(end - 2) = *(end - 1);
5480 *(end - 1) = ' ';
5482 else {
5483 ret = scan_troff(c, san, result);
5485 mandoc_line = oldval;
5486 return ret;
5489 // Entry point
5490 void scan_man_page(const char *man_page)
5492 if (!man_page)
5493 return;
5495 kDebug(7107) << "Start scanning man page";
5497 // ### Do more init
5498 // Unlike man2html, we actually call this several times, hence the need to
5499 // properly cleanup all those static vars
5500 s_ifelseval.clear();
5502 s_characterDefinitionMap.clear();
5503 InitCharacterDefinitions();
5505 s_stringDefinitionMap.clear();
5506 InitStringDefinitions();
5508 s_numberDefinitionMap.clear();
5509 InitNumberDefinitions();
5511 s_argumentList.clear();
5513 section = 0;
5515 s_dollarZero = ""; // No macro called yet!
5517 output_possible = false;
5518 int strLength = qstrlen(man_page);
5519 char *buf = new char[strLength + 2];
5520 qstrcpy(buf+1, man_page);
5521 buf[0] = '\n';
5523 kDebug(7107) << "Parse man page";
5525 scan_troff(buf+1,0,NULL);
5527 kDebug(7107) << "Man page parsed!";
5529 while (itemdepth || dl_set[itemdepth]) {
5530 out_html("</DL>\n");
5531 if (dl_set[itemdepth]) dl_set[itemdepth]=0;
5532 else if (itemdepth > 0) itemdepth--;
5535 out_html(set_font("R"));
5536 out_html(change_to_size(0));
5537 if (!fillout) {
5538 fillout=1;
5539 out_html("</PRE>");
5541 out_html(NEWLINE);
5543 if (section) {
5544 output_real("<div style=\"margin-left: 2cm\">\n");
5545 section = 0;
5548 if (output_possible) {
5549 output_real("</div>\n");
5550 output_real("<div class=\"bannerBottom\" style=\"background-image: url(");
5551 output_real(cssPath);
5552 output_real("/bottom-middle.png); background-repeat: x-repeat; width: 100%; height: 100px; bottom:0pt;\">\n");
5553 output_real("<div class=\"bannerBottomLeft\">\n");
5554 output_real("<img src=\"");
5555 output_real(cssPath);
5556 output_real("/bottom-left.png\" style=\"margin: 0pt;\" alt=\"Bottom left of the banner\">\n");
5557 output_real("</div>\n");
5558 output_real("<div class=\"bannerBottomRight\">\n");
5559 output_real("<img src=\"");
5560 output_real(cssPath);
5561 output_real("/bottom-right.png\" style=\"margin: 0pt\" alt=\"Bottom right of the banner\">\n");
5562 output_real("</div>\n");
5563 output_real("</div>\n");
5565 output_real("</BODY>\n</HTML>\n");
5567 delete [] buf;
5569 // Release memory
5570 s_characterDefinitionMap.clear();
5571 s_stringDefinitionMap.clear();
5572 s_numberDefinitionMap.clear();
5573 s_argumentList.clear();
5575 // reinit static variables for reuse
5576 delete [] buffer;
5577 buffer = 0;
5579 escapesym='\\';
5580 nobreaksym='\'';
5581 controlsym='.';
5582 fieldsym=0;
5583 padsym=0;
5585 buffpos=0;
5586 buffmax=0;
5587 scaninbuff=false;
5588 itemdepth=0;
5589 for (int i = 0; i < 20; i++)
5590 dl_set[i] = 0;
5591 still_dd=false;
5592 for (int i = 0; i < 12; i++)
5593 tabstops[i] = (i+1)*8;
5594 maxtstop=12;
5595 curpos=0;
5597 mandoc_name_count = 0;
5600 #ifdef SIMPLE_MAN2HTML
5601 void output_real(const char *insert)
5603 cout << insert;
5606 char *read_man_page(const char *filename)
5608 int man_pipe = 0;
5609 char *man_buf = NULL;
5611 FILE *man_stream = NULL;
5612 struct stat stbuf;
5613 size_t buf_size;
5614 if (stat(filename, &stbuf) == -1) {
5615 std::cerr << "read_man_page: can not find " << filename << endl;
5616 return NULL;
5618 if (!S_ISREG(stbuf.st_mode)) {
5619 std::cerr << "read_man_page: no file " << filename << endl;
5620 return NULL;
5622 buf_size = stbuf.st_size;
5623 man_buf = stralloc(buf_size+5);
5624 man_pipe = 0;
5625 man_stream = fopen(filename, "r");
5626 if (man_stream) {
5627 man_buf[0] = '\n';
5628 if (fread(man_buf+1, 1, buf_size, man_stream) == buf_size) {
5629 man_buf[buf_size] = '\n';
5630 man_buf[buf_size + 1] = man_buf[buf_size + 2] = '\0';
5632 else {
5633 man_buf = NULL;
5635 fclose(man_stream);
5637 return man_buf;
5640 #ifndef KIO_MAN_TEST
5641 int main(int argc, char **argv)
5643 htmlPath = ".";
5644 cssPath = ".";
5645 if (argc < 2) {
5646 std::cerr << "call: " << argv[0] << " <filename>\n";
5647 return 1;
5649 if (chdir(argv[1])) {
5650 char *buf = read_man_page(argv[1]);
5651 if (buf) {
5652 scan_man_page(buf);
5653 delete [] buf;
5655 } else {
5656 DIR *dir = opendir(".");
5657 struct dirent *ent;
5658 while ((ent = readdir(dir)) != NULL) {
5659 cerr << "converting " << ent->d_name << endl;
5660 char *buf = read_man_page(ent->d_name);
5661 if (buf) {
5662 scan_man_page(buf);
5663 delete [] buf;
5666 closedir(dir);
5668 return 0;
5670 #endif
5673 #endif
5675 // kate: space-indent on; indent-width 4; replace-tabs on;