Document cmus-remote -Q
[cmus.git] / Doc / ttman.c
blob5c27f4ade736f5bceffa9227f385341a07e1e7ad
1 /*
2 * ttman - text to man converter
4 * Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
6 * This file is licensed under the GPLv2.
7 */
8 #include <stdlib.h>
9 #include <stdarg.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <sys/mman.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <errno.h>
20 struct token {
21 struct token *next;
22 struct token *prev;
23 enum {
24 TOK_TEXT, // max one line w/o \n
25 TOK_NL, // \n
26 TOK_ITALIC, // `
27 TOK_BOLD, // *
28 TOK_INDENT, // \t
30 // keywords (@...)
31 TOK_H1,
32 TOK_H2,
33 TOK_LI,
34 TOK_BR,
35 TOK_PRE,
36 TOK_ENDPRE, // must be after TOK_PRE
37 TOK_RAW,
38 TOK_ENDRAW, // must be after TOK_RAW
39 TOK_TITLE, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
40 } type;
41 int line;
43 // not NUL-terminated
44 const char *text;
45 // length of text
46 int len;
49 static const char *program;
50 static const char *filename;
51 static char tmp_file[1024];
52 static FILE *outfile;
53 static int cur_line = 1;
54 static struct token head = { &head, &head, TOK_TEXT, 0, NULL, 0 };
56 #define CONST_STR(str) { str, sizeof(str) - 1 }
57 static const struct {
58 const char *str;
59 int len;
60 } token_names[] = {
61 CONST_STR("text"),
62 CONST_STR("nl"),
63 CONST_STR("italic"),
64 CONST_STR("bold"),
65 CONST_STR("indent"),
67 // keywords
68 CONST_STR("h1"),
69 CONST_STR("h2"),
70 CONST_STR("li"),
71 CONST_STR("br"),
72 CONST_STR("pre"),
73 CONST_STR("endpre"),
74 CONST_STR("raw"),
75 CONST_STR("endraw"),
76 CONST_STR("title")
78 #define NR_TOKEN_NAMES (sizeof(token_names) / sizeof(token_names[0]))
79 #define BUG() die("BUG in %s\n", __FUNCTION__)
81 #ifdef __GNUC__
82 #define __NORETURN __attribute__((__noreturn__))
83 #else
84 #define __NORETURN
85 #endif
87 static __NORETURN void quit(void)
89 if (tmp_file[0])
90 unlink(tmp_file);
91 exit(1);
94 static __NORETURN void die(const char *format, ...)
96 va_list ap;
98 fprintf(stderr, "%s: ", program);
99 va_start(ap, format);
100 vfprintf(stderr, format, ap);
101 va_end(ap);
102 quit();
105 static __NORETURN void syntax(int line, const char *format, ...)
107 va_list ap;
109 fprintf(stderr, "%s:%d: error: ", filename, line);
110 va_start(ap, format);
111 vfprintf(stderr, format, ap);
112 va_end(ap);
113 quit();
116 static inline const char *keyword_name(int type)
118 if (type < TOK_H1 || type > TOK_TITLE)
119 die("BUG: no keyword name for type %d\n", type);
120 return token_names[type].str;
123 static void *xmalloc(size_t size)
125 void *ret = malloc(size);
127 if (!ret)
128 die("OOM when allocating %ul bytes\n", size);
129 return ret;
132 static char *memdup(const char *str, int len)
134 char *s = xmalloc(len + 1);
135 memcpy(s, str, len);
136 s[len] = 0;
137 return s;
140 static struct token *new_token(int type)
142 struct token *tok = xmalloc(sizeof(struct token));
144 tok->prev = NULL;
145 tok->next = NULL;
146 tok->type = type;
147 tok->line = cur_line;
148 return tok;
151 static void free_token(struct token *tok)
153 struct token *prev = tok->prev;
154 struct token *next = tok->next;
156 if (tok == &head)
157 BUG();
159 prev->next = next;
160 next->prev = prev;
161 free(tok);
164 static void emit_token(struct token *tok)
166 tok->prev = head.prev;
167 tok->next = &head;
168 head.prev->next = tok;
169 head.prev = tok;
172 static void emit(int type)
174 struct token *tok = new_token(type);
175 tok->len = 0;
176 tok->text = NULL;
177 emit_token(tok);
180 static int emit_keyword(const char *buf, int size)
182 int i, len;
184 for (len = 0; len < size; len++) {
185 if (!isalnum(buf[len]))
186 break;
189 if (!len)
190 syntax(cur_line, "keyword expected\n");
192 for (i = TOK_H1; i < NR_TOKEN_NAMES; i++) {
193 if (len != token_names[i].len)
194 continue;
195 if (!strncmp(buf, token_names[i].str, len)) {
196 emit(i);
197 return len;
200 syntax(cur_line, "invalid keyword '@%s'\n", memdup(buf, len));
203 static int emit_text(const char *buf, int size)
205 struct token *tok;
206 int i;
208 for (i = 0; i < size; i++) {
209 int c = buf[i];
210 if (c == '@' || c == '`' || c == '*' || c == '\n' || c == '\\' || c == '\t')
211 break;
213 tok = new_token(TOK_TEXT);
214 tok->text = buf;
215 tok->len = i;
216 emit_token(tok);
217 return i;
220 static void tokenize(const char *buf, int size)
222 int pos = 0;
224 while (pos < size) {
225 struct token *tok;
226 int ch;
228 ch = buf[pos++];
229 switch (ch) {
230 case '@':
231 pos += emit_keyword(buf + pos, size - pos);
232 break;
233 case '`':
234 emit(TOK_ITALIC);
235 break;
236 case '*':
237 emit(TOK_BOLD);
238 break;
239 case '\n':
240 emit(TOK_NL);
241 cur_line++;
242 break;
243 case '\t':
244 emit(TOK_INDENT);
245 break;
246 case '\\':
247 tok = new_token(TOK_TEXT);
248 tok->text = buf + pos;
249 tok->len = 1;
250 pos++;
251 if (pos == size || buf[pos] == '\n') {
252 // just one '\\'
253 tok->text--;
256 if (tok->text[0] == '\\') {
257 tok->text = "\\\\";
258 tok->len = 2;
261 emit_token(tok);
262 break;
263 default:
264 pos--;
265 pos += emit_text(buf + pos, size - pos);
266 break;
271 static int is_empty_line(const struct token *tok)
273 while (tok != &head) {
274 int i;
276 switch (tok->type) {
277 case TOK_TEXT:
278 for (i = 0; i < tok->len; i++) {
279 if (tok->text[i] != ' ')
280 return 0;
282 break;
283 case TOK_INDENT:
284 break;
285 case TOK_NL:
286 return 1;
287 default:
288 return 0;
290 tok = tok->next;
292 return 1;
295 static struct token *remove_line(struct token *tok)
297 while (tok != &head) {
298 struct token *next = tok->next;
299 int type = tok->type;
301 free_token(tok);
302 tok = next;
303 if (type == TOK_NL)
304 break;
306 return tok;
309 static struct token *skip_after(struct token *tok, int type)
311 struct token *save = tok;
313 while (tok != &head) {
314 if (tok->type == type) {
315 tok = tok->next;
316 if (tok->type != TOK_NL)
317 syntax(tok->line, "newline expected after @%s\n",
318 keyword_name(type));
319 return tok->next;
321 if (tok->type >= TOK_H1)
322 syntax(tok->line, "keywords not allowed betweed @%s and @%s\n",
323 keyword_name(type-1), keyword_name(type));
324 tok = tok->next;
326 syntax(save->prev->line, "missing @%s\n", keyword_name(type));
329 static struct token *get_next_line(struct token *tok)
331 while (tok != &head) {
332 int type = tok->type;
334 tok = tok->next;
335 if (type == TOK_NL)
336 break;
338 return tok;
341 static struct token *get_indent(struct token *tok, int *ip)
343 int i = 0;
345 while (tok != &head && tok->type == TOK_INDENT) {
346 tok = tok->next;
347 i++;
349 *ip = i;
350 return tok;
353 // line must be non-empty
354 static struct token *check_line(struct token *tok, int *ip)
356 struct token *start;
357 int tok_type;
359 start = tok = get_indent(tok, ip);
361 tok_type = tok->type;
362 switch (tok_type) {
363 case TOK_TEXT:
364 case TOK_BOLD:
365 case TOK_ITALIC:
366 case TOK_BR:
367 tok = tok->next;
368 while (tok != &head) {
369 switch (tok->type) {
370 case TOK_TEXT:
371 case TOK_BOLD:
372 case TOK_ITALIC:
373 case TOK_BR:
374 case TOK_INDENT:
375 break;
376 case TOK_NL:
377 return start;
378 default:
379 syntax(tok->line, "@%s not allowed inside paragraph\n",
380 keyword_name(tok->type));
382 tok = tok->next;
384 break;
385 case TOK_H1:
386 case TOK_H2:
387 case TOK_TITLE:
388 if (*ip)
389 goto indentation;
391 // check arguments
392 tok = tok->next;
393 while (tok != &head) {
394 switch (tok->type) {
395 case TOK_TEXT:
396 case TOK_INDENT:
397 break;
398 case TOK_NL:
399 return start;
400 default:
401 syntax(tok->line, "@%s can contain only text\n",
402 keyword_name(tok_type));
404 tok = tok->next;
406 break;
407 case TOK_LI:
408 // check arguments
409 tok = tok->next;
410 while (tok != &head) {
411 switch (tok->type) {
412 case TOK_TEXT:
413 case TOK_BOLD:
414 case TOK_ITALIC:
415 case TOK_INDENT:
416 break;
417 case TOK_NL:
418 return start;
419 default:
420 syntax(tok->line, "@%s not allowed inside @li\n",
421 keyword_name(tok->type));
423 tok = tok->next;
425 break;
426 case TOK_PRE:
427 // checked later
428 break;
429 case TOK_RAW:
430 if (*ip)
431 goto indentation;
432 // checked later
433 break;
434 case TOK_ENDPRE:
435 case TOK_ENDRAW:
436 syntax(tok->line, "@%s not expected\n", keyword_name(tok->type));
437 break;
438 case TOK_NL:
439 case TOK_INDENT:
440 BUG();
441 break;
443 return start;
444 indentation:
445 syntax(tok->line, "indentation before @%s\n", keyword_name(tok->type));
448 static void insert_nl_before(struct token *next)
450 struct token *prev = next->prev;
451 struct token *new = new_token(TOK_NL);
453 new->prev = prev;
454 new->next = next;
455 prev->next = new;
456 next->prev = new;
459 static void normalize(void)
461 struct token *tok = head.next;
463 * >= 0 if previous line was text (== amount of indent)
464 * -1 if previous block was @pre (amount of indent doesn't matter)
465 * -2 otherwise (@h1 etc., indent was 0)
467 int prev_indent = -2;
469 while (tok != &head) {
470 struct token *start;
471 int i, new_para = 0;
473 // remove empty lines
474 while (is_empty_line(tok)) {
475 tok = remove_line(tok);
476 new_para = 1;
477 if (tok == &head)
478 return;
481 // skips indent
482 start = tok;
483 tok = check_line(tok, &i);
485 switch (tok->type) {
486 case TOK_TEXT:
487 case TOK_ITALIC:
488 case TOK_BOLD:
489 case TOK_BR:
490 // normal text
491 if (new_para && prev_indent >= -1) {
492 // previous line/block was text or @pre
493 // and there was a empty line after it
494 insert_nl_before(start);
497 if (!new_para && prev_indent == i) {
498 // join with previous line
499 struct token *nl = start->prev;
501 if (nl->type != TOK_NL)
502 BUG();
504 if ((nl->prev != &head && nl->prev->type == TOK_BR) ||
505 tok->type == TOK_BR) {
506 // don't convert \n after/before @br to ' '
507 free_token(nl);
508 } else {
509 // convert "\n" to " "
510 nl->type = TOK_TEXT;
511 nl->text = " ";
512 nl->len = 1;
515 // remove indent
516 while (start->type == TOK_INDENT) {
517 struct token *next = start->next;
518 free_token(start);
519 start = next;
523 prev_indent = i;
524 tok = get_next_line(tok);
525 break;
526 case TOK_PRE:
527 case TOK_RAW:
528 // these can be directly after normal text
529 // but not joined with the previous line
530 if (new_para && prev_indent >= -1) {
531 // previous line/block was text or @pre
532 // and there was a empty line after it
533 insert_nl_before(start);
535 tok = skip_after(tok->next, tok->type + 1);
536 prev_indent = -1;
537 break;
538 case TOK_H1:
539 case TOK_H2:
540 case TOK_LI:
541 case TOK_TITLE:
542 // remove white space after H1, H2, L1 and TITLE
543 tok = tok->next;
544 while (tok != &head) {
545 int type = tok->type;
546 struct token *next;
548 if (type == TOK_TEXT) {
549 while (tok->len && *tok->text == ' ') {
550 tok->text++;
551 tok->len--;
553 if (tok->len)
554 break;
556 if (type != TOK_INDENT)
557 break;
559 // empty TOK_TEXT or TOK_INDENT
560 next = tok->next;
561 free_token(tok);
562 tok = next;
564 // not normal text. can't be joined
565 prev_indent = -2;
566 tok = get_next_line(tok);
567 break;
568 case TOK_NL:
569 case TOK_INDENT:
570 case TOK_ENDPRE:
571 case TOK_ENDRAW:
572 BUG();
573 break;
578 #define output(...) fprintf(outfile, __VA_ARGS__)
580 static void output_buf(const char *buf, int len)
582 fwrite(buf, 1, len, outfile);
585 static void output_text(struct token *tok)
587 char buf[1024];
588 const char *str = tok->text;
589 int len = tok->len;
590 int pos = 0;
592 while (len) {
593 int c = *str++;
595 if (pos >= sizeof(buf) - 1) {
596 output_buf(buf, pos);
597 pos = 0;
599 if (c == '-')
600 buf[pos++] = '\\';
601 buf[pos++] = c;
602 len--;
605 if (pos)
606 output_buf(buf, pos);
609 static int bold = 0;
610 static int italic = 0;
611 static int indent = 0;
613 static struct token *output_pre(struct token *tok)
615 int bol = 1;
617 if (tok->type != TOK_NL)
618 syntax(tok->line, "newline expected after @pre\n");
620 output(".nf\n");
621 tok = tok->next;
622 while (tok != &head) {
623 if (bol) {
624 int i;
626 tok = get_indent(tok, &i);
627 if (i != indent && tok->type != TOK_NL)
628 syntax(tok->line, "indent changed in @pre\n");
631 switch (tok->type) {
632 case TOK_TEXT:
633 if (bol && tok->len && tok->text[0] == '.')
634 output("\\&");
635 output_text(tok);
636 break;
637 case TOK_NL:
638 output("\n");
639 bol = 1;
640 tok = tok->next;
641 continue;
642 case TOK_ITALIC:
643 output("`");
644 break;
645 case TOK_BOLD:
646 output("*");
647 break;
648 case TOK_INDENT:
649 // FIXME: warn
650 output(" ");
651 break;
652 case TOK_ENDPRE:
653 output(".fi\n");
654 tok = tok->next;
655 if (tok != &head && tok->type == TOK_NL)
656 tok = tok->next;
657 return tok;
658 default:
659 BUG();
660 break;
662 bol = 0;
663 tok = tok->next;
665 return tok;
668 static struct token *output_raw(struct token *tok)
670 if (tok->type != TOK_NL)
671 syntax(tok->line, "newline expected after @raw\n");
673 tok = tok->next;
674 while (tok != &head) {
675 switch (tok->type) {
676 case TOK_TEXT:
677 if (tok->len == 2 && !strncmp(tok->text, "\\\\", 2)) {
678 /* ugly special case
679 * "\\" (\) was converted to "\\\\" (\\) because
680 * nroff does escaping too.
682 output("\\");
683 } else {
684 output_buf(tok->text, tok->len);
686 break;
687 case TOK_NL:
688 output("\n");
689 break;
690 case TOK_ITALIC:
691 output("`");
692 break;
693 case TOK_BOLD:
694 output("*");
695 break;
696 case TOK_INDENT:
697 output("\t");
698 break;
699 case TOK_ENDRAW:
700 tok = tok->next;
701 if (tok != &head && tok->type == TOK_NL)
702 tok = tok->next;
703 return tok;
704 default:
705 BUG();
706 break;
708 tok = tok->next;
710 return tok;
713 static struct token *output_para(struct token *tok)
715 int bol = 1;
717 while (tok != &head) {
718 switch (tok->type) {
719 case TOK_TEXT:
720 output_text(tok);
721 break;
722 case TOK_ITALIC:
723 italic ^= 1;
724 if (italic) {
725 output("\\fI");
726 } else {
727 output("\\fR");
729 break;
730 case TOK_BOLD:
731 bold ^= 1;
732 if (bold) {
733 output("\\fB");
734 } else {
735 output("\\fR");
737 break;
738 case TOK_BR:
739 if (bol) {
740 output(".br\n");
741 } else {
742 output("\n.br\n");
744 bol = 1;
745 tok = tok->next;
746 continue;
747 case TOK_NL:
748 output("\n");
749 return tok->next;
750 case TOK_INDENT:
751 output(" ");
752 break;
753 default:
754 BUG();
755 break;
757 bol = 0;
758 tok = tok->next;
760 return tok;
763 static struct token *title(struct token *tok, const char *cmd)
765 output("%s", cmd);
766 return output_para(tok->next);
769 static struct token *dump_one(struct token *tok)
771 int i;
773 tok = get_indent(tok, &i);
774 if (tok->type != TOK_RAW) {
775 while (indent < i) {
776 output(".RS\n");
777 indent++;
779 while (indent > i) {
780 output(".RE\n");
781 indent--;
785 switch (tok->type) {
786 case TOK_TEXT:
787 case TOK_ITALIC:
788 case TOK_BOLD:
789 case TOK_BR:
790 if (tok->type == TOK_TEXT && tok->len && tok->text[0] == '.')
791 output("\\&");
792 tok = output_para(tok);
793 break;
794 case TOK_H1:
795 tok = title(tok, ".SH ");
796 break;
797 case TOK_H2:
798 tok = title(tok, ".SS ");
799 break;
800 case TOK_LI:
801 tok = title(tok, ".TP\n");
802 break;
803 case TOK_PRE:
804 tok = output_pre(tok->next);
805 break;
806 case TOK_RAW:
807 tok = output_raw(tok->next);
808 break;
809 case TOK_TITLE:
810 tok = title(tok, ".TH ");
811 // must be after .TH
812 // no hyphenation, adjust left
813 output(".nh\n.ad l\n");
814 break;
815 case TOK_NL:
816 output("\n");
817 tok = tok->next;
818 break;
819 case TOK_ENDPRE:
820 case TOK_ENDRAW:
821 case TOK_INDENT:
822 BUG();
823 break;
825 return tok;
828 static void dump(void)
830 struct token *tok = head.next;
832 while (tok != &head)
833 tok = dump_one(tok);
836 static void process(void)
838 struct stat s;
839 const char *buf;
840 int fd;
842 fd = open(filename, O_RDONLY);
843 if (fd == -1)
844 die("opening `%s' for reading: %s\n", filename, strerror(errno));
845 fstat(fd, &s);
846 if (s.st_size) {
847 buf = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
848 if (buf == MAP_FAILED)
849 die("mmap: %s\n", strerror(errno));
851 tokenize(buf, s.st_size);
852 normalize();
854 dump();
857 int main(int argc, char *argv[])
859 const char *dest;
860 int fd;
862 program = argv[0];
863 if (argc != 3) {
864 fprintf(stderr, "Usage: %s <in> <out>\n", program);
865 return 1;
867 filename = argv[1];
868 dest = argv[2];
870 snprintf(tmp_file, sizeof(tmp_file), "%s.XXXXXX", dest);
871 fd = mkstemp(tmp_file);
872 if (fd < 0)
873 die("creating %s: %s\n", tmp_file, strerror(errno));
874 outfile = fdopen(fd, "w");
875 if (!outfile)
876 die("opening %s: %s\n", tmp_file, strerror(errno));
878 process();
879 if (rename(tmp_file, dest))
880 die("renaming %s to %s: %s\n", tmp_file, dest, strerror(errno));
881 return 0;