2 * ttman - text to man converter
4 * Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
6 * This file is licensed under the GPLv2.
13 #include <sys/types.h>
24 TOK_TEXT
, // max one line w/o \n
36 TOK_ENDPRE
, // must be after TOK_PRE
38 TOK_ENDRAW
, // must be after TOK_RAW
39 TOK_TITLE
, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
49 static const char *program
;
50 static const char *filename
;
51 static char tmp_file
[1024];
53 static int cur_line
= 1;
54 static struct token head
= { &head
, &head
, TOK_TEXT
, 0, NULL
, 0 };
56 #define CONST_STR(str) { str, sizeof(str) - 1 }
78 #define NR_TOKEN_NAMES (sizeof(token_names) / sizeof(token_names[0]))
79 #define BUG() die("BUG in %s\n", __FUNCTION__)
82 #define __NORETURN __attribute__((__noreturn__))
87 static __NORETURN
void quit(void)
94 static __NORETURN
void die(const char *format
, ...)
98 fprintf(stderr
, "%s: ", program
);
100 vfprintf(stderr
, format
, ap
);
105 static __NORETURN
void syntax(int line
, const char *format
, ...)
109 fprintf(stderr
, "%s:%d: error: ", filename
, line
);
110 va_start(ap
, format
);
111 vfprintf(stderr
, format
, ap
);
116 static inline const char *keyword_name(int type
)
118 if (type
< TOK_H1
|| type
> TOK_TITLE
)
119 die("BUG: no keyword name for type %d\n", type
);
120 return token_names
[type
].str
;
123 static void *xmalloc(size_t size
)
125 void *ret
= malloc(size
);
128 die("OOM when allocating %ul bytes\n", size
);
132 static char *memdup(const char *str
, int len
)
134 char *s
= xmalloc(len
+ 1);
140 static struct token
*new_token(int type
)
142 struct token
*tok
= xmalloc(sizeof(struct token
));
147 tok
->line
= cur_line
;
151 static void free_token(struct token
*tok
)
153 struct token
*prev
= tok
->prev
;
154 struct token
*next
= tok
->next
;
164 static void emit_token(struct token
*tok
)
166 tok
->prev
= head
.prev
;
168 head
.prev
->next
= tok
;
172 static void emit(int type
)
174 struct token
*tok
= new_token(type
);
180 static int emit_keyword(const char *buf
, int size
)
184 for (len
= 0; len
< size
; len
++) {
185 if (!isalnum(buf
[len
]))
190 syntax(cur_line
, "keyword expected\n");
192 for (i
= TOK_H1
; i
< NR_TOKEN_NAMES
; i
++) {
193 if (len
!= token_names
[i
].len
)
195 if (!strncmp(buf
, token_names
[i
].str
, len
)) {
200 syntax(cur_line
, "invalid keyword '@%s'\n", memdup(buf
, len
));
203 static int emit_text(const char *buf
, int size
)
208 for (i
= 0; i
< size
; i
++) {
210 if (c
== '@' || c
== '`' || c
== '*' || c
== '\n' || c
== '\\' || c
== '\t')
213 tok
= new_token(TOK_TEXT
);
220 static void tokenize(const char *buf
, int size
)
231 pos
+= emit_keyword(buf
+ pos
, size
- pos
);
247 tok
= new_token(TOK_TEXT
);
248 tok
->text
= buf
+ pos
;
251 if (pos
== size
|| buf
[pos
] == '\n') {
256 if (tok
->text
[0] == '\\') {
265 pos
+= emit_text(buf
+ pos
, size
- pos
);
271 static int is_empty_line(const struct token
*tok
)
273 while (tok
!= &head
) {
278 for (i
= 0; i
< tok
->len
; i
++) {
279 if (tok
->text
[i
] != ' ')
295 static struct token
*remove_line(struct token
*tok
)
297 while (tok
!= &head
) {
298 struct token
*next
= tok
->next
;
299 int type
= tok
->type
;
309 static struct token
*skip_after(struct token
*tok
, int type
)
311 struct token
*save
= tok
;
313 while (tok
!= &head
) {
314 if (tok
->type
== type
) {
316 if (tok
->type
!= TOK_NL
)
317 syntax(tok
->line
, "newline expected after @%s\n",
321 if (tok
->type
>= TOK_H1
)
322 syntax(tok
->line
, "keywords not allowed betweed @%s and @%s\n",
323 keyword_name(type
-1), keyword_name(type
));
326 syntax(save
->prev
->line
, "missing @%s\n", keyword_name(type
));
329 static struct token
*get_next_line(struct token
*tok
)
331 while (tok
!= &head
) {
332 int type
= tok
->type
;
341 static struct token
*get_indent(struct token
*tok
, int *ip
)
345 while (tok
!= &head
&& tok
->type
== TOK_INDENT
) {
353 // line must be non-empty
354 static struct token
*check_line(struct token
*tok
, int *ip
)
359 start
= tok
= get_indent(tok
, ip
);
361 tok_type
= tok
->type
;
368 while (tok
!= &head
) {
379 syntax(tok
->line
, "@%s not allowed inside paragraph\n",
380 keyword_name(tok
->type
));
393 while (tok
!= &head
) {
401 syntax(tok
->line
, "@%s can contain only text\n",
402 keyword_name(tok_type
));
410 while (tok
!= &head
) {
420 syntax(tok
->line
, "@%s not allowed inside @li\n",
421 keyword_name(tok
->type
));
436 syntax(tok
->line
, "@%s not expected\n", keyword_name(tok
->type
));
445 syntax(tok
->line
, "indentation before @%s\n", keyword_name(tok
->type
));
448 static void insert_nl_before(struct token
*next
)
450 struct token
*prev
= next
->prev
;
451 struct token
*new = new_token(TOK_NL
);
459 static void normalize(void)
461 struct token
*tok
= head
.next
;
463 * >= 0 if previous line was text (== amount of indent)
464 * -1 if previous block was @pre (amount of indent doesn't matter)
465 * -2 otherwise (@h1 etc., indent was 0)
467 int prev_indent
= -2;
469 while (tok
!= &head
) {
473 // remove empty lines
474 while (is_empty_line(tok
)) {
475 tok
= remove_line(tok
);
483 tok
= check_line(tok
, &i
);
491 if (new_para
&& prev_indent
>= -1) {
492 // previous line/block was text or @pre
493 // and there was a empty line after it
494 insert_nl_before(start
);
497 if (!new_para
&& prev_indent
== i
) {
498 // join with previous line
499 struct token
*nl
= start
->prev
;
501 if (nl
->type
!= TOK_NL
)
504 if ((nl
->prev
!= &head
&& nl
->prev
->type
== TOK_BR
) ||
505 tok
->type
== TOK_BR
) {
506 // don't convert \n after/before @br to ' '
509 // convert "\n" to " "
516 while (start
->type
== TOK_INDENT
) {
517 struct token
*next
= start
->next
;
524 tok
= get_next_line(tok
);
528 // these can be directly after normal text
529 // but not joined with the previous line
530 if (new_para
&& prev_indent
>= -1) {
531 // previous line/block was text or @pre
532 // and there was a empty line after it
533 insert_nl_before(start
);
535 tok
= skip_after(tok
->next
, tok
->type
+ 1);
542 // remove white space after H1, H2, L1 and TITLE
544 while (tok
!= &head
) {
545 int type
= tok
->type
;
548 if (type
== TOK_TEXT
) {
549 while (tok
->len
&& *tok
->text
== ' ') {
556 if (type
!= TOK_INDENT
)
559 // empty TOK_TEXT or TOK_INDENT
564 // not normal text. can't be joined
566 tok
= get_next_line(tok
);
578 #define output(...) fprintf(outfile, __VA_ARGS__)
580 static void output_buf(const char *buf
, int len
)
582 fwrite(buf
, 1, len
, outfile
);
585 static void output_text(struct token
*tok
)
588 const char *str
= tok
->text
;
595 if (pos
>= sizeof(buf
) - 1) {
596 output_buf(buf
, pos
);
606 output_buf(buf
, pos
);
610 static int italic
= 0;
611 static int indent
= 0;
613 static struct token
*output_pre(struct token
*tok
)
617 if (tok
->type
!= TOK_NL
)
618 syntax(tok
->line
, "newline expected after @pre\n");
622 while (tok
!= &head
) {
626 tok
= get_indent(tok
, &i
);
627 if (i
!= indent
&& tok
->type
!= TOK_NL
)
628 syntax(tok
->line
, "indent changed in @pre\n");
633 if (bol
&& tok
->len
&& tok
->text
[0] == '.')
655 if (tok
!= &head
&& tok
->type
== TOK_NL
)
668 static struct token
*output_raw(struct token
*tok
)
670 if (tok
->type
!= TOK_NL
)
671 syntax(tok
->line
, "newline expected after @raw\n");
674 while (tok
!= &head
) {
677 if (tok
->len
== 2 && !strncmp(tok
->text
, "\\\\", 2)) {
679 * "\\" (\) was converted to "\\\\" (\\) because
680 * nroff does escaping too.
684 output_buf(tok
->text
, tok
->len
);
701 if (tok
!= &head
&& tok
->type
== TOK_NL
)
713 static struct token
*output_para(struct token
*tok
)
717 while (tok
!= &head
) {
763 static struct token
*title(struct token
*tok
, const char *cmd
)
766 return output_para(tok
->next
);
769 static struct token
*dump_one(struct token
*tok
)
773 tok
= get_indent(tok
, &i
);
774 if (tok
->type
!= TOK_RAW
) {
790 if (tok
->type
== TOK_TEXT
&& tok
->len
&& tok
->text
[0] == '.')
792 tok
= output_para(tok
);
795 tok
= title(tok
, ".SH ");
798 tok
= title(tok
, ".SS ");
801 tok
= title(tok
, ".TP\n");
804 tok
= output_pre(tok
->next
);
807 tok
= output_raw(tok
->next
);
810 tok
= title(tok
, ".TH ");
812 // no hyphenation, adjust left
813 output(".nh\n.ad l\n");
828 static void dump(void)
830 struct token
*tok
= head
.next
;
836 static void process(void)
842 fd
= open(filename
, O_RDONLY
);
844 die("opening `%s' for reading: %s\n", filename
, strerror(errno
));
847 buf
= mmap(NULL
, s
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
848 if (buf
== MAP_FAILED
)
849 die("mmap: %s\n", strerror(errno
));
851 tokenize(buf
, s
.st_size
);
857 int main(int argc
, char *argv
[])
864 fprintf(stderr
, "Usage: %s <in> <out>\n", program
);
870 snprintf(tmp_file
, sizeof(tmp_file
), "%s.XXXXXX", dest
);
871 fd
= mkstemp(tmp_file
);
873 die("creating %s: %s\n", tmp_file
, strerror(errno
));
874 outfile
= fdopen(fd
, "w");
876 die("opening %s: %s\n", tmp_file
, strerror(errno
));
879 if (rename(tmp_file
, dest
))
880 die("renaming %s to %s: %s\n", tmp_file
, dest
, strerror(errno
));