2 * ttman - text to man converter
4 * Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
6 * This file is licensed under the GPLv2.
13 #include <sys/types.h>
24 TOK_TEXT
, // max one line w/o \n
36 TOK_ENDPRE
, // must be after TOK_PRE
38 TOK_ENDRAW
, // must be after TOK_RAW
39 TOK_TITLE
, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
49 static const char *program
;
50 static const char *filename
;
51 static char tmp_file
[1024];
53 static int cur_line
= 1;
54 static struct token head
= { &head
, &head
, TOK_TEXT
, 0, NULL
, 0 };
56 #define CONST_STR(str) { str, sizeof(str) - 1 }
57 static const struct keyword
{
71 #define NR_KEYWORDS (sizeof(keywords) / sizeof(struct keyword))
72 #define BUG() die("BUG in %s\n", __FUNCTION__)
75 #define __NORETURN __attribute__((__noreturn__))
80 static __NORETURN
void quit(void)
87 static __NORETURN
void die(const char *format
, ...)
91 fprintf(stderr
, "%s: ", program
);
93 vfprintf(stderr
, format
, ap
);
98 static __NORETURN
void syntax(int line
, const char *format
, ...)
102 fprintf(stderr
, "%s:%d: error: ", filename
, line
);
103 va_start(ap
, format
);
104 vfprintf(stderr
, format
, ap
);
109 static inline const char *keyword_name(int type
)
111 if (type
< TOK_H1
|| type
> TOK_TITLE
)
112 die("BUG: no keyword name for type %d\n", type
);
113 return keywords
[type
- TOK_H1
].str
;
116 static void *xmalloc(size_t size
)
118 void *ret
= malloc(size
);
121 die("OOM when allocating %ul bytes\n", size
);
125 static char *memdup(const char *str
, int len
)
127 char *s
= xmalloc(len
+ 1);
133 static struct token
*new_token(int type
)
135 struct token
*tok
= xmalloc(sizeof(struct token
));
140 tok
->line
= cur_line
;
144 static void free_token(struct token
*tok
)
146 struct token
*prev
= tok
->prev
;
147 struct token
*next
= tok
->next
;
157 static void emit_token(struct token
*tok
)
159 tok
->prev
= head
.prev
;
161 head
.prev
->next
= tok
;
165 static void emit(int type
)
167 struct token
*tok
= new_token(type
);
173 static int emit_keyword(const char *buf
, int size
)
177 for (len
= 0; len
< size
; len
++) {
178 if (!isalnum(buf
[len
]))
183 syntax(cur_line
, "keyword expected\n");
185 for (i
= 0; i
< NR_KEYWORDS
; i
++) {
186 if (len
!= keywords
[i
].len
)
188 if (!strncmp(buf
, keywords
[i
].str
, len
)) {
193 syntax(cur_line
, "invalid keyword '@%s'\n", memdup(buf
, len
));
196 static int emit_text(const char *buf
, int size
)
201 for (i
= 0; i
< size
; i
++) {
203 if (c
== '@' || c
== '`' || c
== '*' || c
== '\n' || c
== '\\' || c
== '\t')
206 tok
= new_token(TOK_TEXT
);
213 static void tokenize(const char *buf
, int size
)
224 pos
+= emit_keyword(buf
+ pos
, size
- pos
);
240 tok
= new_token(TOK_TEXT
);
241 tok
->text
= buf
+ pos
;
244 if (pos
== size
|| buf
[pos
] == '\n') {
249 if (tok
->text
[0] == '\\') {
258 pos
+= emit_text(buf
+ pos
, size
- pos
);
264 static int is_empty_line(const struct token
*tok
)
266 while (tok
!= &head
) {
271 for (i
= 0; i
< tok
->len
; i
++) {
272 if (tok
->text
[i
] != ' ')
288 static struct token
*remove_line(struct token
*tok
)
290 while (tok
!= &head
) {
291 struct token
*next
= tok
->next
;
292 int type
= tok
->type
;
302 static struct token
*skip_after(struct token
*tok
, int type
)
304 struct token
*save
= tok
;
306 while (tok
!= &head
) {
307 if (tok
->type
== type
) {
309 if (tok
->type
!= TOK_NL
)
310 syntax(tok
->line
, "newline expected after @%s\n",
314 if (tok
->type
>= TOK_H1
)
315 syntax(tok
->line
, "keywords not allowed betweed @%s and @%s\n",
316 keyword_name(type
-1), keyword_name(type
));
319 syntax(save
->prev
->line
, "missing @%s\n", keyword_name(type
));
322 static struct token
*get_next_line(struct token
*tok
)
324 while (tok
!= &head
) {
325 int type
= tok
->type
;
334 static struct token
*get_indent(struct token
*tok
, int *ip
)
338 while (tok
!= &head
&& tok
->type
== TOK_INDENT
) {
346 // line must be non-empty
347 static struct token
*check_line(struct token
*tok
, int *ip
)
351 start
= tok
= get_indent(tok
, ip
);
359 while (tok
!= &head
) {
370 syntax(tok
->line
, "@%s not allowed inside paragraph\n",
371 keyword_name(tok
->type
));
384 while (tok
!= &head
) {
392 syntax(tok
->line
, "@%s can contain only text\n",
393 keyword_name(tok
->type
));
401 while (tok
!= &head
) {
411 syntax(tok
->line
, "@%s not allowed inside @li\n",
412 keyword_name(tok
->type
));
427 syntax(tok
->line
, "@%s not expected\n", keyword_name(tok
->type
));
436 syntax(tok
->line
, "indentation before @%s\n", keyword_name(tok
->type
));
439 static void insert_nl_before(struct token
*next
)
441 struct token
*prev
= next
->prev
;
442 struct token
*new = new_token(TOK_NL
);
450 static void normalize(void)
452 struct token
*tok
= head
.next
;
454 * >= 0 if previous line was text (== amount of indent)
455 * -1 if previous block was @pre (amount of indent doesn't matter)
456 * -2 otherwise (@h1 etc., indent was 0)
458 int prev_indent
= -2;
460 while (tok
!= &head
) {
464 // remove empty lines
465 while (is_empty_line(tok
)) {
466 tok
= remove_line(tok
);
474 tok
= check_line(tok
, &i
);
482 if (new_para
&& prev_indent
>= -1) {
483 // previous line/block was text or @pre
484 // and there was a empty line after it
485 insert_nl_before(start
);
488 if (!new_para
&& prev_indent
== i
) {
489 // join with previous line
490 struct token
*nl
= start
->prev
;
492 if (nl
->type
!= TOK_NL
)
495 if ((nl
->prev
!= &head
&& nl
->prev
->type
== TOK_BR
) ||
496 tok
->type
== TOK_BR
) {
497 // don't convert \n after/before @br to ' '
500 // convert "\n" to " "
507 while (start
->type
== TOK_INDENT
) {
508 struct token
*next
= start
->next
;
515 tok
= get_next_line(tok
);
519 // these can be directly after normal text
520 // but not joined with the previous line
521 if (new_para
&& prev_indent
>= -1) {
522 // previous line/block was text or @pre
523 // and there was a empty line after it
524 insert_nl_before(start
);
526 tok
= skip_after(tok
->next
, tok
->type
+ 1);
533 // remove white space after H1, H2, L1 and TITLE
535 while (tok
!= &head
) {
536 int type
= tok
->type
;
539 if (type
== TOK_TEXT
) {
540 while (tok
->len
&& *tok
->text
== ' ') {
547 if (type
!= TOK_INDENT
)
550 // empty TOK_TEXT or TOK_INDENT
555 // not normal text. can't be joined
557 tok
= get_next_line(tok
);
569 #define output(...) fprintf(outfile, __VA_ARGS__)
571 static void output_buf(const char *buf
, int len
)
573 fwrite(buf
, 1, len
, outfile
);
576 static void output_text(struct token
*tok
)
579 const char *str
= tok
->text
;
586 if (pos
>= sizeof(buf
) - 1) {
587 output_buf(buf
, pos
);
597 output_buf(buf
, pos
);
601 static int italic
= 0;
602 static int indent
= 0;
604 static struct token
*output_pre(struct token
*tok
)
608 if (tok
->type
!= TOK_NL
)
609 syntax(tok
->line
, "newline expected after @pre\n");
613 while (tok
!= &head
) {
617 tok
= get_indent(tok
, &i
);
618 if (i
!= indent
&& tok
->type
!= TOK_NL
)
619 syntax(tok
->line
, "indent changed in @pre\n");
624 if (bol
&& tok
->len
&& tok
->text
[0] == '.')
646 if (tok
!= &head
&& tok
->type
== TOK_NL
)
659 static struct token
*output_raw(struct token
*tok
)
661 if (tok
->type
!= TOK_NL
)
662 syntax(tok
->line
, "newline expected after @raw\n");
665 while (tok
!= &head
) {
668 if (tok
->len
== 2 && !strncmp(tok
->text
, "\\\\", 2)) {
670 * "\\" (\) was converted to "\\\\" (\\) because
671 * nroff does escaping too.
675 output_buf(tok
->text
, tok
->len
);
692 if (tok
!= &head
&& tok
->type
== TOK_NL
)
704 static struct token
*output_para(struct token
*tok
)
708 while (tok
!= &head
) {
754 static struct token
*title(struct token
*tok
, const char *cmd
)
757 return output_para(tok
->next
);
760 static struct token
*dump_one(struct token
*tok
)
764 tok
= get_indent(tok
, &i
);
765 if (tok
->type
!= TOK_RAW
) {
781 if (tok
->type
== TOK_TEXT
&& tok
->len
&& tok
->text
[0] == '.')
783 tok
= output_para(tok
);
786 tok
= title(tok
, ".SH ");
789 tok
= title(tok
, ".SS ");
792 tok
= title(tok
, ".TP\n");
795 tok
= output_pre(tok
->next
);
798 tok
= output_raw(tok
->next
);
801 tok
= title(tok
, ".TH ");
803 // no hyphenation, adjust left
804 output(".nh\n.ad l\n");
819 static void dump(void)
821 struct token
*tok
= head
.next
;
827 static void process(void)
833 fd
= open(filename
, O_RDONLY
);
835 die("opening `%s' for reading: %s\n", filename
, strerror(errno
));
838 buf
= mmap(NULL
, s
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
839 if (buf
== MAP_FAILED
)
840 die("mmap: %s\n", strerror(errno
));
842 tokenize(buf
, s
.st_size
);
848 int main(int argc
, char *argv
[])
855 fprintf(stderr
, "Usage: %s <in> <out>\n", program
);
861 snprintf(tmp_file
, sizeof(tmp_file
), "%s.XXXXXX", dest
);
862 fd
= mkstemp(tmp_file
);
864 die("creating %s: %s\n", tmp_file
, strerror(errno
));
865 outfile
= fdopen(fd
, "w");
867 die("opening %s: %s\n", tmp_file
, strerror(errno
));
870 if (rename(tmp_file
, dest
))
871 die("renaming %s to %s: %s\n", tmp_file
, dest
, strerror(errno
));