Document Replay Gain
[cmus.git] / Doc / ttman.c
blob3c9fdb6c1c5f89e66aeef2001e717246234422ce
1 /*
2 * ttman - text to man converter
4 * Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
6 * This file is licensed under the GPLv2.
7 */
8 #include <stdlib.h>
9 #include <stdarg.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <sys/mman.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <errno.h>
20 struct token {
21 struct token *next;
22 struct token *prev;
23 enum {
24 TOK_TEXT, // max one line w/o \n
25 TOK_NL, // \n
26 TOK_ITALIC, // `
27 TOK_BOLD, // *
28 TOK_INDENT, // \t
30 // keywords (@...)
31 TOK_H1,
32 TOK_H2,
33 TOK_LI,
34 TOK_BR,
35 TOK_PRE,
36 TOK_ENDPRE, // must be after TOK_PRE
37 TOK_RAW,
38 TOK_ENDRAW, // must be after TOK_RAW
39 TOK_TITLE, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
40 } type;
41 int line;
43 // not NUL-terminated
44 const char *text;
45 // length of text
46 int len;
49 static const char *program;
50 static const char *filename;
51 static char tmp_file[1024];
52 static FILE *outfile;
53 static int cur_line = 1;
54 static struct token head = { &head, &head, TOK_TEXT, 0, NULL, 0 };
56 #define CONST_STR(str) { str, sizeof(str) - 1 }
57 static const struct keyword {
58 const char *str;
59 int len;
60 } keywords[] = {
61 CONST_STR("h1"),
62 CONST_STR("h2"),
63 CONST_STR("li"),
64 CONST_STR("br"),
65 CONST_STR("pre"),
66 CONST_STR("endpre"),
67 CONST_STR("raw"),
68 CONST_STR("endraw"),
69 CONST_STR("title")
71 #define NR_KEYWORDS (sizeof(keywords) / sizeof(struct keyword))
72 #define BUG() die("BUG in %s\n", __FUNCTION__)
74 #ifdef __GNUC__
75 #define __NORETURN __attribute__((__noreturn__))
76 #else
77 #define __NORETURN
78 #endif
80 static __NORETURN void quit(void)
82 if (tmp_file[0])
83 unlink(tmp_file);
84 exit(1);
87 static __NORETURN void die(const char *format, ...)
89 va_list ap;
91 fprintf(stderr, "%s: ", program);
92 va_start(ap, format);
93 vfprintf(stderr, format, ap);
94 va_end(ap);
95 quit();
98 static __NORETURN void syntax(int line, const char *format, ...)
100 va_list ap;
102 fprintf(stderr, "%s:%d: error: ", filename, line);
103 va_start(ap, format);
104 vfprintf(stderr, format, ap);
105 va_end(ap);
106 quit();
109 static inline const char *keyword_name(int type)
111 if (type < TOK_H1 || type > TOK_TITLE)
112 die("BUG: no keyword name for type %d\n", type);
113 return keywords[type - TOK_H1].str;
116 static void *xmalloc(size_t size)
118 void *ret = malloc(size);
120 if (!ret)
121 die("OOM when allocating %ul bytes\n", size);
122 return ret;
125 static char *memdup(const char *str, int len)
127 char *s = xmalloc(len + 1);
128 memcpy(s, str, len);
129 s[len] = 0;
130 return s;
133 static struct token *new_token(int type)
135 struct token *tok = xmalloc(sizeof(struct token));
137 tok->prev = NULL;
138 tok->next = NULL;
139 tok->type = type;
140 tok->line = cur_line;
141 return tok;
144 static void free_token(struct token *tok)
146 struct token *prev = tok->prev;
147 struct token *next = tok->next;
149 if (tok == &head)
150 BUG();
152 prev->next = next;
153 next->prev = prev;
154 free(tok);
157 static void emit_token(struct token *tok)
159 tok->prev = head.prev;
160 tok->next = &head;
161 head.prev->next = tok;
162 head.prev = tok;
165 static void emit(int type)
167 struct token *tok = new_token(type);
168 tok->len = 0;
169 tok->text = NULL;
170 emit_token(tok);
173 static int emit_keyword(const char *buf, int size)
175 int i, len;
177 for (len = 0; len < size; len++) {
178 if (!isalnum(buf[len]))
179 break;
182 if (!len)
183 syntax(cur_line, "keyword expected\n");
185 for (i = 0; i < NR_KEYWORDS; i++) {
186 if (len != keywords[i].len)
187 continue;
188 if (!strncmp(buf, keywords[i].str, len)) {
189 emit(TOK_H1 + i);
190 return len;
193 syntax(cur_line, "invalid keyword '@%s'\n", memdup(buf, len));
196 static int emit_text(const char *buf, int size)
198 struct token *tok;
199 int i;
201 for (i = 0; i < size; i++) {
202 int c = buf[i];
203 if (c == '@' || c == '`' || c == '*' || c == '\n' || c == '\\' || c == '\t')
204 break;
206 tok = new_token(TOK_TEXT);
207 tok->text = buf;
208 tok->len = i;
209 emit_token(tok);
210 return i;
213 static void tokenize(const char *buf, int size)
215 int pos = 0;
217 while (pos < size) {
218 struct token *tok;
219 int ch;
221 ch = buf[pos++];
222 switch (ch) {
223 case '@':
224 pos += emit_keyword(buf + pos, size - pos);
225 break;
226 case '`':
227 emit(TOK_ITALIC);
228 break;
229 case '*':
230 emit(TOK_BOLD);
231 break;
232 case '\n':
233 emit(TOK_NL);
234 cur_line++;
235 break;
236 case '\t':
237 emit(TOK_INDENT);
238 break;
239 case '\\':
240 tok = new_token(TOK_TEXT);
241 tok->text = buf + pos;
242 tok->len = 1;
243 pos++;
244 if (pos == size || buf[pos] == '\n') {
245 // just one '\\'
246 tok->text--;
249 if (tok->text[0] == '\\') {
250 tok->text = "\\\\";
251 tok->len = 2;
254 emit_token(tok);
255 break;
256 default:
257 pos--;
258 pos += emit_text(buf + pos, size - pos);
259 break;
264 static int is_empty_line(const struct token *tok)
266 while (tok != &head) {
267 int i;
269 switch (tok->type) {
270 case TOK_TEXT:
271 for (i = 0; i < tok->len; i++) {
272 if (tok->text[i] != ' ')
273 return 0;
275 break;
276 case TOK_INDENT:
277 break;
278 case TOK_NL:
279 return 1;
280 default:
281 return 0;
283 tok = tok->next;
285 return 1;
288 static struct token *remove_line(struct token *tok)
290 while (tok != &head) {
291 struct token *next = tok->next;
292 int type = tok->type;
294 free_token(tok);
295 tok = next;
296 if (type == TOK_NL)
297 break;
299 return tok;
302 static struct token *skip_after(struct token *tok, int type)
304 struct token *save = tok;
306 while (tok != &head) {
307 if (tok->type == type) {
308 tok = tok->next;
309 if (tok->type != TOK_NL)
310 syntax(tok->line, "newline expected after @%s\n",
311 keyword_name(type));
312 return tok->next;
314 if (tok->type >= TOK_H1)
315 syntax(tok->line, "keywords not allowed betweed @%s and @%s\n",
316 keyword_name(type-1), keyword_name(type));
317 tok = tok->next;
319 syntax(save->prev->line, "missing @%s\n", keyword_name(type));
322 static struct token *get_next_line(struct token *tok)
324 while (tok != &head) {
325 int type = tok->type;
327 tok = tok->next;
328 if (type == TOK_NL)
329 break;
331 return tok;
334 static struct token *get_indent(struct token *tok, int *ip)
336 int i = 0;
338 while (tok != &head && tok->type == TOK_INDENT) {
339 tok = tok->next;
340 i++;
342 *ip = i;
343 return tok;
346 // line must be non-empty
347 static struct token *check_line(struct token *tok, int *ip)
349 struct token *start;
351 start = tok = get_indent(tok, ip);
353 switch (tok->type) {
354 case TOK_TEXT:
355 case TOK_BOLD:
356 case TOK_ITALIC:
357 case TOK_BR:
358 tok = tok->next;
359 while (tok != &head) {
360 switch (tok->type) {
361 case TOK_TEXT:
362 case TOK_BOLD:
363 case TOK_ITALIC:
364 case TOK_BR:
365 case TOK_INDENT:
366 break;
367 case TOK_NL:
368 return start;
369 default:
370 syntax(tok->line, "@%s not allowed inside paragraph\n",
371 keyword_name(tok->type));
373 tok = tok->next;
375 break;
376 case TOK_H1:
377 case TOK_H2:
378 case TOK_TITLE:
379 if (*ip)
380 goto indentation;
382 // check arguments
383 tok = tok->next;
384 while (tok != &head) {
385 switch (tok->type) {
386 case TOK_TEXT:
387 case TOK_INDENT:
388 break;
389 case TOK_NL:
390 return start;
391 default:
392 syntax(tok->line, "@%s can contain only text\n",
393 keyword_name(tok->type));
395 tok = tok->next;
397 break;
398 case TOK_LI:
399 // check arguments
400 tok = tok->next;
401 while (tok != &head) {
402 switch (tok->type) {
403 case TOK_TEXT:
404 case TOK_BOLD:
405 case TOK_ITALIC:
406 case TOK_INDENT:
407 break;
408 case TOK_NL:
409 return start;
410 default:
411 syntax(tok->line, "@%s not allowed inside @li\n",
412 keyword_name(tok->type));
414 tok = tok->next;
416 break;
417 case TOK_PRE:
418 // checked later
419 break;
420 case TOK_RAW:
421 if (*ip)
422 goto indentation;
423 // checked later
424 break;
425 case TOK_ENDPRE:
426 case TOK_ENDRAW:
427 syntax(tok->line, "@%s not expected\n", keyword_name(tok->type));
428 break;
429 case TOK_NL:
430 case TOK_INDENT:
431 BUG();
432 break;
434 return start;
435 indentation:
436 syntax(tok->line, "indentation before @%s\n", keyword_name(tok->type));
439 static void insert_nl_before(struct token *next)
441 struct token *prev = next->prev;
442 struct token *new = new_token(TOK_NL);
444 new->prev = prev;
445 new->next = next;
446 prev->next = new;
447 next->prev = new;
450 static void normalize(void)
452 struct token *tok = head.next;
454 * >= 0 if previous line was text (== amount of indent)
455 * -1 if previous block was @pre (amount of indent doesn't matter)
456 * -2 otherwise (@h1 etc., indent was 0)
458 int prev_indent = -2;
460 while (tok != &head) {
461 struct token *start;
462 int i, new_para = 0;
464 // remove empty lines
465 while (is_empty_line(tok)) {
466 tok = remove_line(tok);
467 new_para = 1;
468 if (tok == &head)
469 return;
472 // skips indent
473 start = tok;
474 tok = check_line(tok, &i);
476 switch (tok->type) {
477 case TOK_TEXT:
478 case TOK_ITALIC:
479 case TOK_BOLD:
480 case TOK_BR:
481 // normal text
482 if (new_para && prev_indent >= -1) {
483 // previous line/block was text or @pre
484 // and there was a empty line after it
485 insert_nl_before(start);
488 if (!new_para && prev_indent == i) {
489 // join with previous line
490 struct token *nl = start->prev;
492 if (nl->type != TOK_NL)
493 BUG();
495 if ((nl->prev != &head && nl->prev->type == TOK_BR) ||
496 tok->type == TOK_BR) {
497 // don't convert \n after/before @br to ' '
498 free_token(nl);
499 } else {
500 // convert "\n" to " "
501 nl->type = TOK_TEXT;
502 nl->text = " ";
503 nl->len = 1;
506 // remove indent
507 while (start->type == TOK_INDENT) {
508 struct token *next = start->next;
509 free_token(start);
510 start = next;
514 prev_indent = i;
515 tok = get_next_line(tok);
516 break;
517 case TOK_PRE:
518 case TOK_RAW:
519 // these can be directly after normal text
520 // but not joined with the previous line
521 if (new_para && prev_indent >= -1) {
522 // previous line/block was text or @pre
523 // and there was a empty line after it
524 insert_nl_before(start);
526 tok = skip_after(tok->next, tok->type + 1);
527 prev_indent = -1;
528 break;
529 case TOK_H1:
530 case TOK_H2:
531 case TOK_LI:
532 case TOK_TITLE:
533 // remove white space after H1, H2, L1 and TITLE
534 tok = tok->next;
535 while (tok != &head) {
536 int type = tok->type;
537 struct token *next;
539 if (type == TOK_TEXT) {
540 while (tok->len && *tok->text == ' ') {
541 tok->text++;
542 tok->len--;
544 if (tok->len)
545 break;
547 if (type != TOK_INDENT)
548 break;
550 // empty TOK_TEXT or TOK_INDENT
551 next = tok->next;
552 free_token(tok);
553 tok = next;
555 // not normal text. can't be joined
556 prev_indent = -2;
557 tok = get_next_line(tok);
558 break;
559 case TOK_NL:
560 case TOK_INDENT:
561 case TOK_ENDPRE:
562 case TOK_ENDRAW:
563 BUG();
564 break;
569 #define output(...) fprintf(outfile, __VA_ARGS__)
571 static void output_buf(const char *buf, int len)
573 fwrite(buf, 1, len, outfile);
576 static void output_text(struct token *tok)
578 char buf[1024];
579 const char *str = tok->text;
580 int len = tok->len;
581 int pos = 0;
583 while (len) {
584 int c = *str++;
586 if (pos >= sizeof(buf) - 1) {
587 output_buf(buf, pos);
588 pos = 0;
590 if (c == '-')
591 buf[pos++] = '\\';
592 buf[pos++] = c;
593 len--;
596 if (pos)
597 output_buf(buf, pos);
600 static int bold = 0;
601 static int italic = 0;
602 static int indent = 0;
604 static struct token *output_pre(struct token *tok)
606 int bol = 1;
608 if (tok->type != TOK_NL)
609 syntax(tok->line, "newline expected after @pre\n");
611 output(".nf\n");
612 tok = tok->next;
613 while (tok != &head) {
614 if (bol) {
615 int i;
617 tok = get_indent(tok, &i);
618 if (i != indent && tok->type != TOK_NL)
619 syntax(tok->line, "indent changed in @pre\n");
622 switch (tok->type) {
623 case TOK_TEXT:
624 if (bol && tok->len && tok->text[0] == '.')
625 output("\\&");
626 output_text(tok);
627 break;
628 case TOK_NL:
629 output("\n");
630 bol = 1;
631 tok = tok->next;
632 continue;
633 case TOK_ITALIC:
634 output("`");
635 break;
636 case TOK_BOLD:
637 output("*");
638 break;
639 case TOK_INDENT:
640 // FIXME: warn
641 output(" ");
642 break;
643 case TOK_ENDPRE:
644 output(".fi\n");
645 tok = tok->next;
646 if (tok != &head && tok->type == TOK_NL)
647 tok = tok->next;
648 return tok;
649 default:
650 BUG();
651 break;
653 bol = 0;
654 tok = tok->next;
656 return tok;
659 static struct token *output_raw(struct token *tok)
661 if (tok->type != TOK_NL)
662 syntax(tok->line, "newline expected after @raw\n");
664 tok = tok->next;
665 while (tok != &head) {
666 switch (tok->type) {
667 case TOK_TEXT:
668 if (tok->len == 2 && !strncmp(tok->text, "\\\\", 2)) {
669 /* ugly special case
670 * "\\" (\) was converted to "\\\\" (\\) because
671 * nroff does escaping too.
673 output("\\");
674 } else {
675 output_buf(tok->text, tok->len);
677 break;
678 case TOK_NL:
679 output("\n");
680 break;
681 case TOK_ITALIC:
682 output("`");
683 break;
684 case TOK_BOLD:
685 output("*");
686 break;
687 case TOK_INDENT:
688 output("\t");
689 break;
690 case TOK_ENDRAW:
691 tok = tok->next;
692 if (tok != &head && tok->type == TOK_NL)
693 tok = tok->next;
694 return tok;
695 default:
696 BUG();
697 break;
699 tok = tok->next;
701 return tok;
704 static struct token *output_para(struct token *tok)
706 int bol = 1;
708 while (tok != &head) {
709 switch (tok->type) {
710 case TOK_TEXT:
711 output_text(tok);
712 break;
713 case TOK_ITALIC:
714 italic ^= 1;
715 if (italic) {
716 output("\\fI");
717 } else {
718 output("\\fR");
720 break;
721 case TOK_BOLD:
722 bold ^= 1;
723 if (bold) {
724 output("\\fB");
725 } else {
726 output("\\fR");
728 break;
729 case TOK_BR:
730 if (bol) {
731 output(".br\n");
732 } else {
733 output("\n.br\n");
735 bol = 1;
736 tok = tok->next;
737 continue;
738 case TOK_NL:
739 output("\n");
740 return tok->next;
741 case TOK_INDENT:
742 output(" ");
743 break;
744 default:
745 BUG();
746 break;
748 bol = 0;
749 tok = tok->next;
751 return tok;
754 static struct token *title(struct token *tok, const char *cmd)
756 output("%s", cmd);
757 return output_para(tok->next);
760 static struct token *dump_one(struct token *tok)
762 int i;
764 tok = get_indent(tok, &i);
765 if (tok->type != TOK_RAW) {
766 while (indent < i) {
767 output(".RS\n");
768 indent++;
770 while (indent > i) {
771 output(".RE\n");
772 indent--;
776 switch (tok->type) {
777 case TOK_TEXT:
778 case TOK_ITALIC:
779 case TOK_BOLD:
780 case TOK_BR:
781 if (tok->type == TOK_TEXT && tok->len && tok->text[0] == '.')
782 output("\\&");
783 tok = output_para(tok);
784 break;
785 case TOK_H1:
786 tok = title(tok, ".SH ");
787 break;
788 case TOK_H2:
789 tok = title(tok, ".SS ");
790 break;
791 case TOK_LI:
792 tok = title(tok, ".TP\n");
793 break;
794 case TOK_PRE:
795 tok = output_pre(tok->next);
796 break;
797 case TOK_RAW:
798 tok = output_raw(tok->next);
799 break;
800 case TOK_TITLE:
801 tok = title(tok, ".TH ");
802 // must be after .TH
803 // no hyphenation, adjust left
804 output(".nh\n.ad l\n");
805 break;
806 case TOK_NL:
807 output("\n");
808 tok = tok->next;
809 break;
810 case TOK_ENDPRE:
811 case TOK_ENDRAW:
812 case TOK_INDENT:
813 BUG();
814 break;
816 return tok;
819 static void dump(void)
821 struct token *tok = head.next;
823 while (tok != &head)
824 tok = dump_one(tok);
827 static void process(void)
829 struct stat s;
830 const char *buf;
831 int fd;
833 fd = open(filename, O_RDONLY);
834 if (fd == -1)
835 die("opening `%s' for reading: %s\n", filename, strerror(errno));
836 fstat(fd, &s);
837 if (s.st_size) {
838 buf = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
839 if (buf == MAP_FAILED)
840 die("mmap: %s\n", strerror(errno));
842 tokenize(buf, s.st_size);
843 normalize();
845 dump();
848 int main(int argc, char *argv[])
850 const char *dest;
851 int fd;
853 program = argv[0];
854 if (argc != 3) {
855 fprintf(stderr, "Usage: %s <in> <out>\n", program);
856 return 1;
858 filename = argv[1];
859 dest = argv[2];
861 snprintf(tmp_file, sizeof(tmp_file), "%s.XXXXXX", dest);
862 fd = mkstemp(tmp_file);
863 if (fd < 0)
864 die("creating %s: %s\n", tmp_file, strerror(errno));
865 outfile = fdopen(fd, "w");
866 if (!outfile)
867 die("opening %s: %s\n", tmp_file, strerror(errno));
869 process();
870 if (rename(tmp_file, dest))
871 die("renaming %s to %s: %s\n", tmp_file, dest, strerror(errno));
872 return 0;