2 * ttman - text to man converter
4 * Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
6 * This file is licensed under the GPLv2.
8 * changes by Ketmar // Invisible Vector
18 #include <sys/types.h>
23 #define BUG() die("BUG in %s\n", __FUNCTION__)
26 # define __NORETURN __attribute__((__noreturn__))
27 # define __PRINTF(fmt,nvar) __attribute__((format(printf, fmt, nvar)))
30 # define __PRINTF(fmt,nvar)
34 typedef struct token
{
38 TOK_TEXT
, // max one line w/o \n
49 TOK_ENDPRE
, // must be after TOK_PRE
51 TOK_ENDRAW
, // must be after TOK_RAW
52 TOK_TITLE
, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
62 static const char *filename
;
63 static char tmp_file
[1024];
65 static int cur_line
= 1;
66 static Token head
= { &head
, &head
, TOK_TEXT
, 0, NULL
, 0 };
68 static int bold
= 0; // bool
69 static int italic
= 0; // bool
70 static size_t indent
= 0;
73 static void reset (void) {
75 head
.next
= head
.prev
= &head
;
85 #define CONST_STR(str) { str, sizeof(str)-1 }
106 #define NR_TOKEN_NAMES (sizeof(token_names)/sizeof(token_names[0]))
109 static __NORETURN
void quit (void) {
110 if (tmp_file
[0]) unlink(tmp_file
);
115 static __NORETURN
__PRINTF(1, 2) void die (const char *format
, ...) {
117 fprintf(stderr
, "GENMAN: ");
118 va_start(ap
, format
);
119 vfprintf(stderr
, format
, ap
);
125 static __NORETURN
__PRINTF(2, 3) void syntax (int line
, const char *format
, ...) {
127 fprintf(stderr
, "%s:%d: error: ", filename
, line
);
128 va_start(ap
, format
);
129 vfprintf(stderr
, format
, ap
);
135 static inline const char *keyword_name (int type
) {
136 if (type
< TOK_H1
|| type
> TOK_TITLE
) die("BUG: no keyword name for type %d\n", type
);
137 return token_names
[type
].str
;
141 static void *xmalloc (size_t size
) {
142 void *ret
= malloc(size
);
143 #if defined(__x86_64__) || defined(__ppc64__)
144 if (!ret
) die("OOM when allocating %lu bytes\n", size
);
146 if (!ret
) die("OOM when allocating %u bytes\n", size
);
151 static char *memdup (const char *str
, int len
) {
152 char *s
= xmalloc(len
+1);
153 if (len
> 0) memcpy(s
, str
, len
);
159 static Token
*new_token (int type
) {
160 Token
*tok
= xmalloc(sizeof(Token
));
164 tok
->line
= cur_line
;
169 static void free_token (Token
*tok
) {
170 Token
*prev
= tok
->prev
, *next
= tok
->next
;
171 if (tok
== &head
) BUG();
178 static void emit_token (Token
*tok
) {
179 tok
->prev
= head
.prev
;
181 head
.prev
->next
= tok
;
186 static void emit (int type
) {
187 Token
*tok
= new_token(type
);
194 static int emit_keyword (const char *buf
, size_t size
) {
196 for (len
= 0; len
< size
; ++len
) if (!isalnum((unsigned char)buf
[len
])) break;
197 if (!len
) syntax(cur_line
, "keyword expected\n");
198 for (size_t i
= TOK_H1
; i
< NR_TOKEN_NAMES
; ++i
) {
199 if (len
!= token_names
[i
].len
) continue;
200 if (!strncmp(buf
, token_names
[i
].str
, len
)) {
205 syntax(cur_line
, "invalid keyword '@%s'\n", memdup(buf
, len
));
209 static int emit_text (const char *buf
, size_t size
) {
212 for (i
= 0; i
< size
; ++i
) {
213 int c
= (unsigned char)buf
[i
];
214 if (c
== '@' || c
== '`' || c
== '*' || c
== '\n' || c
== '\\' || c
== '\t') break;
216 tok
= new_token(TOK_TEXT
);
224 static void tokenize (const char *buf
, size_t size
) {
229 ch
= (unsigned char)buf
[pos
++];
232 pos
+= emit_keyword(buf
+pos
, size
-pos
);
247 case ' ': // this can be space or indent
248 if (pos
== 1 || pos
>= size
|| buf
[pos
-2] != '\n' || buf
[pos
] != ' ') goto normal_text
;
250 --pos
; // first space
252 if (pos
+1 >= size
) syntax(cur_line
, "invalid indentation\n");
253 if (buf
[pos
] != ' ') break; // done
254 if (buf
[pos
+1] != ' ') syntax(cur_line
, "invalid indentation\n");
260 tok
= new_token(TOK_TEXT
);
264 if (pos
== size
|| buf
[pos
] == '\n') {
268 if (tok
->text
[0] == '\\') {
277 pos
+= emit_text(buf
+pos
, size
-pos
);
284 static int is_empty_line (const Token
*tok
) {
285 while (tok
!= &head
) {
288 for (size_t i
= 0; i
< tok
->len
; ++i
) if (tok
->text
[i
] != ' ') return 0;
303 static Token
*remove_line (Token
*tok
) {
304 while (tok
!= &head
) {
305 Token
*next
= tok
->next
;
306 int type
= tok
->type
;
309 if (type
== TOK_NL
) break;
315 static Token
*skip_after (Token
*tok
, int type
) {
317 while (tok
!= &head
) {
318 if ((int)tok
->type
== type
) {
320 if (tok
->type
!= TOK_NL
) syntax(tok
->line
, "newline expected after @%s\n", keyword_name(type
));
323 if (tok
->type
>= TOK_H1
) syntax(tok
->line
, "keywords not allowed betweed @%s and @%s\n", keyword_name(type
-1), keyword_name(type
));
326 syntax(save
->prev
->line
, "missing @%s\n", keyword_name(type
));
330 static Token
*get_next_line (Token
*tok
) {
331 while (tok
!= &head
) {
332 int type
= tok
->type
;
334 if (type
== TOK_NL
) break;
340 static Token
*get_indent (Token
*tok
, size_t *ip
) {
342 for (i
= 0; tok
!= &head
&& tok
->type
== TOK_INDENT
; ++i
) tok
= tok
->next
;
348 // line must be non-empty
349 static Token
*check_line (Token
*tok
, size_t *ip
) {
352 start
= tok
= get_indent(tok
, ip
);
353 tok_type
= tok
->type
;
360 while (tok
!= &head
) {
371 syntax(tok
->line
, "@%s not allowed inside paragraph\n", keyword_name(tok
->type
));
379 if (*ip
) goto indentation
;
382 while (tok
!= &head
) {
390 syntax(tok
->line
, "@%s can contain only text\n", keyword_name(tok_type
));
398 while (tok
!= &head
) {
408 syntax(tok
->line
, "@%s not allowed inside @li\n",
409 keyword_name(tok
->type
));
418 if (*ip
) goto indentation
;
423 syntax(tok
->line
, "@%s not expected\n", keyword_name(tok
->type
));
432 syntax(tok
->line
, "indentation before @%s\n", keyword_name(tok
->type
));
436 static void insert_nl_before (Token
*next
) {
437 Token
*prev
= next
->prev
;
438 Token
*new = new_token(TOK_NL
);
446 static void normalize (void) {
447 Token
*tok
= head
.next
;
449 * >= 0 if previous line was text (== amount of indent)
450 * -1 if previous block was @pre (amount of indent doesn't matter)
451 * -2 otherwise (@h1 etc., indent was 0)
453 long prev_indent
= -2;
454 while (tok
!= &head
) {
457 int new_para
= 0; // bool
458 // remove empty lines
459 while (is_empty_line(tok
)) {
460 tok
= remove_line(tok
);
462 if (tok
== &head
) return;
466 tok
= check_line(tok
, &i
);
473 if (new_para
&& prev_indent
>= -1) {
474 // previous line/block was text or @pre
475 // and there was a empty line after it
476 insert_nl_before(start
);
478 if (!new_para
&& prev_indent
== (long)i
) {
479 // join with previous line
480 Token
*nl
= start
->prev
;
481 if (nl
->type
!= TOK_NL
) BUG();
482 if ((nl
->prev
!= &head
&& nl
->prev
->type
== TOK_BR
) || tok
->type
== TOK_BR
) {
483 // don't convert \n after/before @br to ' '
486 // convert "\n" to " "
492 while (start
->type
== TOK_INDENT
) {
493 Token
*next
= start
->next
;
498 prev_indent
= (long)i
;
499 tok
= get_next_line(tok
);
503 // these can be directly after normal text
504 // but not joined with the previous line
505 if (new_para
&& prev_indent
>= -1) {
506 // previous line/block was text or @pre
507 // and there was a empty line after it
508 insert_nl_before(start
);
510 tok
= skip_after(tok
->next
, tok
->type
+1);
517 // remove white space after H1, H2, L1 and TITLE
519 while (tok
!= &head
) {
520 int type
= tok
->type
;
522 if (type
== TOK_TEXT
) {
523 while (tok
->len
&& *tok
->text
== ' ') {
529 if (type
!= TOK_INDENT
) break;
530 // empty TOK_TEXT or TOK_INDENT
535 // not normal text. can't be joined
537 tok
= get_next_line(tok
);
550 #define output(...) fprintf(outfile, __VA_ARGS__)
552 static void output_buf (const char *buf
, int len
) {
553 fwrite(buf
, 1, len
, outfile
);
557 static void output_text (Token
*tok
) {
559 const char *str
= tok
->text
;
564 if (pos
>= (int)sizeof(buf
)-1) {
565 output_buf(buf
, pos
);
568 if (c
== '-') buf
[pos
++] = '\\';
572 if (pos
) output_buf(buf
, pos
);
576 static Token
*output_pre (Token
*tok
) {
578 if (tok
->type
!= TOK_NL
) syntax(tok
->line
, "newline expected after @pre\n");
581 while (tok
!= &head
) {
584 tok
= get_indent(tok
, &i
);
585 if (i
!= indent
&& tok
->type
!= TOK_NL
) syntax(tok
->line
, "indent changed in @pre\n");
589 if (bol
&& tok
->len
&& tok
->text
[0] == '.') output("\\&");
610 if (tok
!= &head
&& tok
->type
== TOK_NL
) tok
= tok
->next
;
623 static Token
*output_raw (Token
*tok
) {
624 if (tok
->type
!= TOK_NL
) syntax(tok
->line
, "newline expected after @raw\n");
626 while (tok
!= &head
) {
629 if (tok
->len
== 2 && !strncmp(tok
->text
, "\\\\", 2)) {
631 * "\\" (\) was converted to "\\\\" (\\) because
632 * nroff does escaping too.
636 output_buf(tok
->text
, tok
->len
);
653 if (tok
!= &head
&& tok
->type
== TOK_NL
) tok
= tok
->next
;
665 static Token
*output_para (Token
*tok
) {
667 while (tok
!= &head
) {
714 static Token
*title (Token
*tok
, const char *cmd
) {
716 return output_para(tok
->next
);
720 static Token
*dump_one (Token
*tok
) {
722 tok
= get_indent(tok
, &i
);
723 if (tok
->type
!= TOK_RAW
) {
738 if (tok
->type
== TOK_TEXT
&& tok
->len
&& tok
->text
[0] == '.') output("\\&");
739 tok
= output_para(tok
);
742 tok
= title(tok
, ".SH ");
745 tok
= title(tok
, ".SS ");
748 tok
= title(tok
, ".TP\n");
751 tok
= output_pre(tok
->next
);
754 tok
= output_raw(tok
->next
);
757 tok
= title(tok
, ".TH ");
759 // no hyphenation, adjust left
760 output(".nh\n.ad l\n");
776 static void dump (void) {
777 Token
*tok
= head
.next
;
778 while (tok
!= &head
) tok
= dump_one(tok
);
782 static void process (void) {
785 int fd
= open(filename
, O_RDONLY
);
786 if (fd
== -1) die("opening `%s' for reading: %s\n", filename
, strerror(errno
));
789 buf
= mmap(NULL
, s
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
790 if (buf
== MAP_FAILED
) die("mmap: %s\n", strerror(errno
));
791 tokenize(buf
, s
.st_size
);
798 void generate_man (const char *infname
, const char *outfname
) {
804 snprintf(tmp_file
, sizeof(tmp_file
), "%s.XXXXXX", outfname
);
805 fd
= mkstemp(tmp_file
);
806 if (fd
< 0) die("creating %s: %s\n", tmp_file
, strerror(errno
));
808 outfile
= fdopen(fd
, "w");
809 if (!outfile
) die("opening %s: %s\n", tmp_file
, strerror(errno
));
813 if (rename(tmp_file
, outfname
)) die("renaming %s to %s: %s\n", tmp_file
, outfname
, strerror(errno
));