texi2txt: add support for the @table command
[wmaker-crm.git] / script / generate-txt-from-texi.sh
blobe56d78a17f26a67f5cdf18a1534f2fd3e536cd44
1 #!/bin/sh
2 ###########################################################################
4 # Window Maker window manager
6 # Copyright (c) 2014-2015 Christophe CURIS
7 # Copyright (c) 2015 Window Maker Team
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 ###########################################################################
25 # generate-txt-from-texi.sh:
26 # generate a plain text file from a texinfo documentation
28 # The goal is to achieve a result similar to:
29 # texi2any --plaintext --no-split <filename>
31 # The reason for this script is that we do not want to add a strict
32 # dependancy on the 'makeinfo' tool suite (which has its own dependancies)
34 # There is also the problem that we use is to generate some documentations
35 # that should be available before the 'configure' script have been run
36 # (in the 'autogen.sh' script) because some of these documentation provide
37 # information for running the 'configure' script; We distribute these
38 # generated docs so normal user won't have the problem, but people that
39 # want to build from the Git repository.
41 # This script is not a reference for Texinfo syntax, so if you modified the
42 # texi source, you should really consider running texi2any at least once to
43 # make sure everything is still ok.
45 ###########################################################################
47 # Despite trying to be close to the texi2any output, this script has a few
48 # known differences:
50 # - texi2any does not generate a proper title header, it satisfy itself
51 # with a rudimentary single line; this script generates a better looking
52 # header with all the information provided
54 # - the table of content contains the line number for the section to ease
55 # navigation contrary to texi2any that satisfy itself with a simplist toc
57 # - the paragraphs are properly justified, contrary to texi2any which only
58 # flush left them in text outputs
60 # - There are 2 blank lines instead of 1 before chapter/section to make
61 # them stand out more
63 # - the line length is set to 76 instead of 72
65 # - there are some difference in what characters are added when a style is
66 # used for a string (@emph, @file, @sc, ...) because it assumes the text
67 # will be read in a plain text tool with no special "smart" highlighting
69 # - it does not check that the syntax is valid; there are some simple
70 # checks but it may misbehave, it does not replace a quality check with
71 # texi2any
73 # - not all commands are implemented, some because they were not needed
74 # so far, probably a few because they would be too complex to implement
76 ###########################################################################
78 # Please note that this script is writen in sh+awk on purpose: this script
79 # is gonna be run on the machine of the person who is trying to compile
80 # WindowMaker, and as such we cannot be sure to find any scripting language
81 # in a known version and that works (python/ruby/tcl/perl/php/you-name-it).
83 # So for portability, we stick to the same sh+awk constraint as Autotools
84 # to limit the problem, see for example:
85 # http://www.gnu.org/savannah-checkouts/gnu/autoconf/manual/autoconf-2.69/html_node/Portable-Shell.html
87 ###########################################################################
89 # Report an error on stderr and exit with status 1 to tell make could not work
90 arg_error() {
91 echo "$0: $@" >&2
92 exit 1
95 # print help and exit with success status
96 print_help() {
97 echo "$0: convert a Texinfo file into a plain text file"
98 echo "Usage: $0 [options...] file.texi"
99 echo "valid options are:"
100 echo " -Dvar=val : set variable 'var' to value 'val'"
101 echo " -e email : set email address in variable 'emailsupport'"
102 echo " -v version : version of the project"
103 echo " -o file : name of text file to create"
104 exit 0
107 # Extract command line arguments
108 while [ $# -gt 0 ]; do
109 case $1 in
111 -D*)
112 echo "$1" | grep '^-D[a-zA-Z][a-zA-Z]*=' > /dev/null || arg_error "syntax error for '$1', expected -Dname=value"
113 var_defs="$var_defs
114 `echo "$1" | sed -e 's/^-D/ variable["/ ; s/=/"] = "/ ; s/$/";/' `"
118 shift
119 var_defs="$var_defs
120 variable[\"emailsupport\"] = \"@email{`echo "$1" | sed -e 's/@/@@/g' `}\";"
123 -h|-help|--help) print_help ;;
126 shift
127 output_file="$1"
131 shift
132 project_version="$1"
135 -*) arg_error "unknow option '$1'" ;;
138 [ "x$input_file" = "x" ] || arg_error "only 1 input file can be specified, not \"$input_file\" and \"$1\""
139 input_file="$1"
141 esac
142 shift
143 done
145 # Check consistency of command-line
146 [ "x$input_file" != "x" ] || arg_error "no input texi file given"
147 [ "x$output_file" != "x" ] || arg_error "no output file given"
149 ###########################################################################
150 # The script works in 2 passes, in the first pass we generate an almost
151 # complete text file in the temporary file $temp_file, it also generates
152 # the table of content in $toc_file as a sed script.
153 # The second pass generate the $output_file from that $temp_file and the
154 # $toc_file
155 ###########################################################################
157 # Create the temp file in the current directory
158 temp_file="`echo "$input_file" | sed -e 's,^.*/\([^/]*\)$,\1, ; s,\.[^.]*$,,' `.tmp"
159 toc_file="`echo "$temp_file" | sed -e 's,\.[^.]*$,,' `.toc"
161 # Run awk for 1st pass, but if it fails stop now without deleting temp files
162 awk '
163 # Stop processing everything, print the message for user and return error code
164 # to tell "make" to not go further
165 function report_error(message) {
166 print "Error: " message > "/dev/stderr";
168 # When we call "exit", the block "END" is still called, we falsely set
169 # this variable to skip a spurious second error message
170 bye_marker_found = 1;
172 # Code 1 is used when the script is invoked with incorrect arguments
173 # Code 2 is used by awk to report problems
174 exit 3;
177 # Conditionals for @ifXXX and @ifnotXXX commands
178 # stored in a stack to allow embedding conditionals inside other conditionals
179 # the global variable "cond_state" contains the current condition (0 or 1)
180 function start_conditional(name, value, local_i) {
181 cond_level++;
182 cond_names[cond_level] = name;
183 cond_value[cond_level] = value;
184 cond_state = value;
185 for (local_i = 1; local_i < cond_level; local_i++) {
186 cond_state = cond_state && cond_value[local_i];
190 function end_conditional(name, local_i) {
191 cond_level--;
192 cond_state = 1;
193 for (local_i = 1; local_i < cond_level; local_i++) {
194 cond_state = cond_state && cond_value[local_i];
198 # Texinfo Variables
199 # the texinfo standard allows to have variables set with @set and used
200 # with @value; they can also be defined from command-line (-D)
201 # they are stored in the global array "variable[name]"
202 function set_variable(line, local_idx, local_name, local_value) {
203 gsub(/^[ \t]*/, "", line);
204 local_idx = match(line, /[ \t]/);
205 if (local_idx > 0) {
206 local_name = substr(line, 1, local_idx - 1);
207 local_value = substr(line, local_idx + 1);
208 gsub(/^[ \t]*/, "", local_value);
209 } else {
210 local_name = line;
211 local_value = "";
213 variable[ local_name ] = local_value;
216 # Write a single line to the output
217 function write_line(line) {
218 if (!cond_state) { return; }
220 if (redirect_out == "no") {
221 print line;
222 line_number++;
224 } else if (redirect_out == "copyright") {
225 copyright_lines[copyright_count++] = line;
227 } else {
228 report_error("redirect output mode \"" redirect_out "\" is not supported (line " NR ")");
232 # Paragraph modes
233 # the current mode for paragraph handling is a Stack to allow embedding
234 # modes inside other modes
235 # the global variable "par_mode" contains the active mode
236 function par_mode_push(mode, local_i) {
237 par_mode_count++;
238 par_mode_save_previous[par_mode_count] = par_mode;
239 par_mode_save_length[par_mode_count] = line_length;
240 par_mode_save_prefix[par_mode_count] = line_prefix;
241 par_mode_save_justify[par_mode_count] = par_justify;
242 par_mode_save_itemmark[par_mode_count] = item_list_mark;
243 par_mode = mode;
245 # Check for quality of output
246 if (length(line_prefix) + 25 > line_length) {
247 print "Warning: too many paragraph modes imbricated at line " NR " for " mode > "/dev/stderr";
248 line_length = length(line_prefix) + 25;
252 function par_mode_pop(mode, local_i) {
253 if ((par_mode != mode) || (par_mode_count <= 0)) {
254 report_error("found @end " mode " at line " NR " but not in @" mode " (current state is @" par_mode ")");
256 par_mode = par_mode_save_previous[par_mode_count];
257 line_length = par_mode_save_length[par_mode_count];
258 line_prefix = par_mode_save_prefix[par_mode_count];
259 par_justify = par_mode_save_justify[par_mode_count];
260 item_list_mark = par_mode_save_itemmark[par_mode_count];
261 par_mode_count--;
264 # Discard all the lines in the file until the specified "@end" is found on a line by itself
265 function discard_block(name, local_start_line) {
266 local_start_line = NR;
267 while (1) {
268 if (getline == 0) { report_error("end of file reached while searching \"@end " name "\", started at line " local_start_line); }
269 if ($0 == "@end " name) { break; }
273 # Title Page generation
274 function generate_title_page() {
275 if (!cond_state) { return; }
277 if (par_nb_words > 0) {
278 generate_paragraph();
279 write_line(gen_underline(0, 76));
282 # Title page start with 5 blank lines so the "title" coming after will
283 # stand out a little bit
284 write_line("");
285 write_line("");
286 write_line("");
287 par_mode_push("titlepage");
288 line_prefix = " ";
289 line_length = 76 - 4;
292 function generate_title_page_title(title, local_array, local_count, local_i) {
293 if (!cond_state) { return; }
295 if (par_mode != "titlepage") {
296 report_error("command @title used outside @titlepage, at line " NR);
298 generate_paragraph();
300 # Title deserves more space
301 write_line("");
302 write_line("");
304 # Split long title
305 if (length(title) < 60) {
306 local_count = 1;
307 local_array[1] = title;
308 } else {
309 local_count = int((length(title) + 59 ) / 60);
310 sub_length = int((length(title) + local_count - 1) / local_count);
312 local_count = 0;
313 while (length(title) > 0) {
314 if (length(title) > sub_length) {
315 # Cut at first space before the length
316 local_i = sub_length + 1;
317 while (local_i > 0) {
318 if (substr(title, local_i, 1) == " ") { break; }
319 local_i--;
321 if (local_i == 0) {
322 # Can not break first word, break at first possible place
323 local_i = index(title, " ");
324 if (local_i == 0) { local_i = length(title) + 1; }
326 } else {
327 local_i = length(title) + 1;
330 local_count++;
331 local_array[local_count] = substr(title, 1, local_i - 1);
333 title = substr(title, local_i + 1);
337 # Center the title
338 for (local_i = 1; local_i <= local_count; local_i++) {
339 write_line(gen_underline(-1, int((76 - length(local_array[local_i])) / 2)) local_array[local_i]);
342 write_line("");
343 write_line("");
346 function generate_title_page_subtitle(title, local_array, local_count, local_i) {
347 if (!cond_state) { return; }
349 if (par_mode != "titlepage") {
350 report_error("command @subtitle used outside @titlepage, at line " NR);
352 generate_paragraph();
354 # Split long lines
355 if (length(title) < 65) {
356 local_count = 1;
357 local_array[1] = title;
358 } else {
359 local_count = int((length(title) + 64) / 65);
360 sub_length = int((length(title) + local_count - 1) / local_count);
362 local_count = 0;
363 while (length(title) > 0) {
364 if (length(title) > sub_length) {
365 # Cut at first space before the length
366 local_i = sub_length + 1;
367 while (local_i > 0) {
368 if (substr(title, local_i, 1) == " ") { break; }
369 local_i--;
371 if (local_i == 0) {
372 # Can not break first word, break at first possible place
373 local_i = index(title, " ");
374 if (local_i == 0) { local_i = length(title) + 1; }
376 } else {
377 local_i = length(title) + 1;
380 local_count++;
381 local_array[local_count] = substr(title, 1, local_i - 1);
383 title = substr(title, local_i + 1);
387 # Center the title
388 for (local_i = 1; local_i <= local_count; local_i++) {
389 write_line(gen_underline(-1, int((76 - length(local_array[local_i]) - 4) / 2)) "~ " local_array[local_i] " ~");
393 # Generate separation line to simulate page breaks in plain text file
394 function generate_page_break() {
395 if (!cond_state) { return; }
397 generate_paragraph();
398 if (par_mode = "titlepage") {
399 write_line("");
401 write_line("");
402 write_line(gen_underline(0, 76));
403 par_indent = 1;
406 # Handle chapter and section declaration
407 # take care of the automatic numbering and to put the line in the table of
408 # content file, then generate the underlined line in output
409 function new_section(level, title, is_numbered, local_i, local_line) {
410 if (!cond_state) { return; }
412 # Dump the current paragraph now
413 generate_paragraph();
415 # Update the counters
416 if (is_numbered) {
417 section[level]++;
418 for (local_i = level + 1; local_i <= 4; local_i++) {
419 section[local_i] = 0;
423 # Generate the line to be displayed
424 if (is_numbered) {
425 local_line = section[1];
426 for (local_i = 2; local_i <= level; local_i++) {
427 local_line = local_line "." section[local_i];
429 local_line = local_line " " title;
430 } else {
431 local_line = title;
434 # Add the entry to the ToC
435 toc_count++;
436 toc_entry_level[toc_count] = level;
437 toc_entry_name[toc_count] = local_line;
438 for (local_i = 1; local_i < level; local_i++) {
439 toc_entry_name[toc_count] = " " toc_entry_name[toc_count];
441 toc_entry_line[toc_count] = line_number + 3;
443 # Print the section description
444 write_line("");
445 write_line("");
446 write_line(local_line);
447 write_line(gen_underline(level, length(local_line)));
448 par_indent = 0;
451 # List of Items
452 function start_item_list(mark, type, default_mark) {
453 par_mode_push(type);
454 list_is_first_item = 1;
455 list_item_wants_sepline = 0;
456 par_indent = 1;
457 if (line_prefix == "") {
458 # First level of enumeration get one mode indentation space
459 line_prefix = " ";
460 } else {
461 line_prefix = line_prefix " ";
463 if (mark == "") {
464 item_list_mark = default_mark;
465 } else {
466 item_list_mark = execute_commands(mark);
468 write_line("");
471 # One item in a Table
472 function generate_item_in_table(line) {
473 if (line !~ /^[ \t]*@itemx?[ \t]/) {
474 report_error("bas usage for @item inside a @table, should be at start of line and followed by its value");
477 generate_paragraph();
478 if (list_item_wants_sepline && !list_is_first_item) {
479 write_line("");
482 # Apply the global table style to this item
483 gsub(/^[ \t]*@itemx?[ \t]*/, "", line);
484 line = execute_commands(item_list_mark "{" line "}");
486 # Cancel the indentation added for the 2nd column for that line
487 line = substr(line_prefix, 1, length(line_prefix)-5) line;
488 write_line(line);
490 list_item_wants_sepline = 0;
493 # Generate Underline string with the specified length
494 function gen_underline(id, len, local) {
495 if (id == -1) { local = " "; } else
496 if (id == 1) { local = "**********"; } else
497 if (id == 2) { local = "=========="; } else
498 if (id == 3) { local = "----------"; } else
499 if (id == 4) { local = ".........."; }
500 else { local = "~~~~~~~~~~"; }
501 while (length(local) < len) {
502 local = local local;
504 return substr(local, 1, len);
507 # Generate text for an URL link
508 function generate_url_reference(args, local_nb, local_arr) {
509 local_nb = split(args, local_arr, ",");
510 if (local_nb == 1) {
511 return local_arr[1];
513 } else if (local_nb == 2) {
514 return execute_commands(local_arr[2]) " (" local_arr[1] ")";
516 } else if (local_nb == 3) {
517 return execute_commands(local_arr[3]);
519 } else {
520 report_error("bad number of argument " local_nb " for @uref at line " NR);
524 # Generate a line with the name of an author
525 # note, we assume the name(s) always fit on a line
526 function generate_author_line(name, local_offset, local_attach_to_par) {
527 if (!cond_state) { return; }
529 local_attach_to_par = (par_nb_words > 0);
531 generate_paragraph();
533 if (par_mode == "titlepage") {
534 name = "-- " name " --";
535 local_offset = int((76 - length(name)) / 2);
536 if (local_offset < 2) { local_offset = 2; }
537 write_line("");
538 write_line(gen_underline(-1, local_offset) name);
540 } else if (par_mode == "quotation") {
541 name = "-- " name;
542 local_offset = int((line_length - length(line_prefix) - length(name)) * 2/3);
543 if (local_offset < length(line_prefix) + 2) { local_offset = length(line_prefix) + 2; }
544 if (!local_attach_to_par) { write_line(""); }
545 write_line(line_prefix gen_underline(-1, local_offset) name);
547 } else {
548 report_error("command @author used in an inappropriate mode (" par_mode ") at line " NR);
552 # Add the specified line to the curren paragraph being built, do not print anything yet
553 function add_text_to_paragraph(line) {
554 nb = split(line, words, /[ \t]+/);
555 for (i = 1; i <= nb; i++) {
556 if (words[i] != "") {
557 par_word[par_nb_words++] = words[i];
562 # Print the paragraph from all the lines read so far
563 function generate_paragraph( local_prefix, local_line, local_length,
564 idx_word_start, idx_word_end, local_i) {
565 if (par_nb_words <= 0) { return; }
567 local_line = line_prefix;
569 if (par_mode == "list") {
570 if (list_item_wants_sepline && !list_is_first_item) {
571 write_line("");
573 list_is_first_item = 0;
574 list_item_wants_sepline = 0;
575 if (!par_indent) {
576 local_prefix = item_list_mark " ";
577 while (length(local_prefix) < 5) { local_prefix = " " local_prefix; }
578 local_line = substr(local_line, 1, length(local_line) - 5) local_prefix;
581 } else if (par_mode == "enum") {
582 if (list_item_wants_sepline && !list_is_first_item) {
583 write_line("");
585 list_is_first_item = 0;
586 list_item_wants_sepline = 0;
587 if (!par_indent) {
588 local_prefix = " " item_list_mark ". ";
589 local_line = substr(local_line, 1, length(local_line) - 5) local_prefix;
591 # Increment the enumeration counter for the next item now
592 if (item_list_mark + 0 == item_list_mark) {
593 item_list_mark++;
594 } else {
595 local_i = index("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", item_list_mark);
596 if (local_i == 0) {
597 report_error("value \"" item_list_mark "\" is not supported for enumerated list - invalid @enumerate argument or list too long?");
599 item_list_mark = substr("BCDEFGHIJKLMNOPQRSTUVWXYZ!bcdefghijklmnopqrstuvwxyz!", local_i, 1);
603 } else if (par_mode == "table") {
604 if (list_item_wants_sepline && !list_is_first_item) {
605 write_line("");
607 list_is_first_item = 0;
608 list_item_wants_sepline = 0;
610 } else if (par_mode == "titlepage") {
611 write_line("");
613 } else if (par_mode == "par") {
614 write_line("");
615 if (par_indent) {
616 local_line = local_line " ";
619 } else if (par_mode == "quotation") {
620 write_line("");
621 # There is no extra indentation of paragraphs in this mode
623 } else {
624 report_error("paragraph mode \"" par_mode "\" is not supported in generate_paragraph (line " NR ")");
627 # Split the paragraph in lines
628 idx_word_start = 0;
629 while (idx_word_start < par_nb_words) {
630 # First word is always printed, this makes sure that words too long for a line will
631 # always be printed, very likely on a line by themselfs
632 idx_word_end = idx_word_start;
633 local_length = length(local_line) + length(par_word[idx_word_start]);
634 idx_word_start++;
636 # See how many word we can fit on the line
637 while (idx_word_end < par_nb_words - 1) {
638 if (local_length + 1 + length(par_word[idx_word_end + 1]) > line_length) { break; }
639 idx_word_end++;
640 local_length = local_length + 1 + length(par_word[idx_word_end]);
643 # Put all those words on the current line with the appropriate justification
644 if (par_justify == "right") {
645 local_line = local_line gen_underline(-1, line_length - local_length) par_word[idx_word_start - 1];
646 while (idx_word_start <= idx_word_end) {
647 local_line = local_line " " par_word[idx_word_start++];
649 } else {
650 if ((par_justify == "left") || (idx_word_end == par_nb_words - 1) ||
651 (local_length >= line_length) || (idx_word_end < idx_word_start)) {
652 local_line = local_line par_word[idx_word_start - 1];
653 while (idx_word_start <= idx_word_end) {
654 local_line = local_line " " par_word[idx_word_start++];
656 } else {
657 # We calculate the ideal size of a space (as a real number) which would
658 # make all the words perfectly fill the line, the formula being
659 # ideal size = 1 + needed_extra_spaces / number_of_spaces_in_line
660 ideal_space_length = 1 + (line_length - local_length) / (idx_word_end - idx_word_start + 1);
661 count_spaces = 0;
662 for (local_i = idx_word_start; local_i <= idx_word_end; local_i++) {
663 count_spaces = count_spaces + ideal_space_length;
664 word_space[local_i] = gen_underline(-1, int(count_spaces + 0.5));
665 count_spaces = count_spaces - length(word_space[local_i]);
668 local_line = local_line par_word[idx_word_start - 1];
669 while (idx_word_start <= idx_word_end) {
670 local_line = local_line word_space[idx_word_start] par_word[idx_word_start++];
675 write_line(local_line);
677 # Reset for next line
678 local_line = line_prefix;
680 par_nb_words = 0;
681 par_indent = 1;
684 # Replace commands by text in the line, return the result
685 function execute_commands(line, replaced_line, command) {
686 replaced_line = "";
687 while (1) {
688 idx = match(line, /@([a-zA-Z]+|.)/);
689 if (idx == 0) { break; }
691 # Separate the command and its arguments from the rest of the line
692 replaced_line = replaced_line substr(line, 1, idx - 1);
693 command = substr(line, idx + 1, RLENGTH - 1);
694 line = substr(line, idx + RLENGTH);
696 if (line ~ /^\{/) {
697 # Command has argument(s), extract them
698 brace_count = 0;
699 for (i = 1; i <= length(line); i++) {
700 if (substr(line, i, 1) == "{") {
701 brace_count++;
703 if (substr(line, i, 1) == "}") {
704 brace_count--;
705 if (brace_count == 0) { break; }
708 if (brace_count != 0) {
709 report_error("closing brace not found for command \"@" command "\", at line " NR);
712 cmdargs = substr(line, 2, i-2);
713 line = substr(line, i + 1);
715 } else {
716 # Command does not have arguments, discard the spaces used to separate it
717 # from the next text
718 cmdargs = "";
719 sub(/^[ \t]+/, "", line);
722 # Commands generating "special" characters #################################
723 if (command == "@") {
724 replaced_line = replaced_line "@";
726 } else if (command == "bullet") {
727 replaced_line = replaced_line "*";
729 } else if (command == "copyright") {
730 replaced_line = replaced_line "(c)";
732 } else if (command == "minus") {
733 replaced_line = replaced_line "-";
735 } else if (command == "registeredsymbol") {
736 replaced_line = replaced_line "(r)";
738 } else if (command == "today") {
739 # Make sure the date will be in english (we use "C" because it not certain
740 # that the English locale is enabled on the machine of the user)
741 replaced_line = replaced_line "'"`LANG=C date '+%d %B %Y' | sed -e 's,^0,,' `"'";
743 # Commands to display text in a special style ##############################
744 } else if (command == "asis") {
745 line = cmdargs line;
747 } else if (command == "b") { # bold
748 line = "*" cmdargs "*" line;
750 } else if ((command == "cite") ||
751 (command == "emph")) {
752 line = cmdargs line;
754 } else if ((command == "code") ||
755 (command == "command") ||
756 (command == "env") ||
757 (command == "option") ||
758 (command == "var")) {
759 # Should be in fixed-spacing font; printed with single-quotes
760 line = "'\''" cmdargs "'\''" line;
762 } else if (command == "i") { # italic
763 line = "_" cmdargs "_" line;
765 } else if (command == "email") {
766 line = "<" cmdargs ">" line;
768 } else if (command == "file") {
769 line = "\"" cmdargs "\"" line;
771 } else if (command == "key") {
772 line = "<" cmdargs ">" line;
774 } else if (command == "r") { # roman font
775 line = cmdargs line;
777 } else if (command == "sc") {
778 # Small-Caps, keep as-is in plain text
779 line = cmdargs line;
781 } else if (command == "t") { # typewriter-like
782 line = cmdargs line;
784 } else if (command == "uref") {
785 replaced_line = replaced_line generate_url_reference(cmdargs);
787 # Variable and Conditional commands ########################################
788 } else if (command == "value") {
789 if (variable[cmdargs] == "") {
790 report_error("variable '" cmdargs "' is unknow, for @value at line " NR);
792 line = variable[cmdargs] line;
794 # Miscelleanous commands ###################################################
795 } else if (command == "c") {
796 # Comments: ignore everything to the end of line
797 line = "";
799 } else {
800 report_error("unknow command @" command " at line " NR);
804 return (replaced_line line);
807 # Handle appropriately the "@end xxx"
808 function process_end(line) {
809 if (line == cond_names[cond_level]) {
810 end_conditional(line);
811 return;
813 if (line == "copying") {
814 generate_paragraph();
815 redirect_out = "no";
817 } else if (line == "enumerate") {
818 generate_paragraph();
819 par_mode_pop("enum");
820 par_indent = 1;
822 } else if (line == "example") {
823 generate_paragraph();
824 par_mode_pop("example");
825 par_indent = 1;
827 } else if (line == "flushleft") {
828 generate_paragraph();
829 par_mode_pop(par_mode);
830 par_indent = 1;
832 } else if (line == "flushright") {
833 generate_paragraph();
834 par_mode_pop(par_mode);
835 par_indent = 1;
837 } else if (line == "itemize") {
838 generate_paragraph();
839 par_mode_pop("list");
840 par_indent = 1;
842 } else if (line == "quotation") {
843 generate_paragraph();
844 par_mode_pop("quotation");
845 par_indent = 1;
847 } else if ((line == "table") || (line == "ftable") || (line == "vtable")) {
848 generate_paragraph();
849 par_mode_pop("table");
850 par_indent = 1;
852 } else if (line == "titlepage") {
853 generate_page_break();
854 par_mode_pop("titlepage");
855 par_indent = 0;
857 } else {
858 report_error("unknow command @end " line " at line " NR);
862 BEGIN {
863 # Count the lines generated for the Table of Content
864 line_number = 0;
866 # To perform some basic checks on the file
867 top_was_found = 0;
868 bye_marker_found = 0;
870 # Paragraph generation parameters
871 par_mode_count = 0;
872 par_mode = "par";
873 par_nb_words = 0;
874 par_indent = 1;
875 par_justify = "justify";
876 redirect_out = "no";
877 line_length = 76;
878 line_prefix = "";
880 # To handle conditional code
881 cond_level = 0;
882 cond_state = 1;
884 # Number of entries in the Table of Content
885 toc_count = 0;
886 toc_file = "'"$toc_file"'";
888 # Define a custom variable so it is possible to differentiate between
889 # texi2any and this script
890 variable["cctexi2txt"] = "1.0";
892 # Variables inherited from the command line'"$var_defs"'
895 # First line is special, we always ignore it
896 (NR == 1) { next; }
898 /^[ \t]*@/ {
899 # Treat the special commands that are supposed to be on a line by themselves
900 idx = match($0, /^@([a-zA-Z]+)/);
901 if (idx != 0) {
902 # Remove the command from current line
903 command = substr($0, idx + 1, RLENGTH - 1);
904 line = substr($0, idx + 1 + RLENGTH);
905 sub(/^[ \t]+/, "", line);
907 # Commands for structuring the document ####################################
908 if (command == "chapter") {
909 new_section(1, execute_commands(line), 1);
910 next;
912 } else if (command == "section") {
913 new_section(2, execute_commands(line), 1);
914 next;
916 } else if (command == "subsection") {
917 new_section(3, execute_commands(line), 1);
918 next;
920 } else if (command == "subsubsection") {
921 new_section(4, execute_commands(line), 1);
922 next;
924 } else if (command == "node") {
925 # We ignore nodes completely, this is for the "info" format only
926 next;
928 } else if (command == "top") {
929 # This is mandatory for "info" format, but useless for plain text
930 if (top_was_found > 0) {
931 report_error("command @top at line " NR " but was already found at line " top_was_found);
933 top_was_found = NR;
934 next;
936 } else if (command == "unnumbered") {
937 new_section(1, execute_commands(line), 0);
938 next;
940 # Commands for content in the Title Page ###################################
941 } else if (command == "author") {
942 generate_author_line(execute_commands(line));
943 next;
945 } else if (command == "subtitle") {
946 generate_title_page_subtitle(execute_commands(line));
947 next;
949 } else if (command == "title") {
950 generate_title_page_title(execute_commands(line));
951 next;
953 # Commands changing the way paragraph are displayed ########################
954 } else if (command == "copying") {
955 generate_paragraph();
956 redirect_out = "copyright";
957 copyright_count = 0;
958 next;
960 } else if (command == "end") {
961 process_end(line);
962 next;
964 } else if (command == "enumerate") {
965 if (cond_state) {
966 generate_paragraph();
967 start_item_list(line, "enum", "1");
969 next;
971 } else if (command == "example") {
972 if (cond_state) {
973 generate_paragraph();
974 write_line("");
975 par_mode_push("example");
976 line_prefix = line_prefix " ";
978 next;
980 } else if (command == "flushleft") {
981 if (cond_state) {
982 generate_paragraph();
983 par_mode_push(par_mode);
984 par_justify = "left";
985 par_indent = 0;
987 next;
989 } else if (command == "flushright") {
990 if (cond_state) {
991 generate_paragraph();
992 par_mode_push(par_mode);
993 par_justify = "right";
994 par_indent = 0;
996 next;
998 } else if (command == "itemize") {
999 if (cond_state) {
1000 generate_paragraph();
1001 start_item_list(line, "list", "*");
1003 next;
1005 } else if (command == "menu") {
1006 generate_paragraph();
1007 discard_block(command);
1008 next;
1010 } else if (command == "quotation") {
1011 if (cond_state) {
1012 generate_paragraph();
1013 par_mode_push("quotation");
1014 line_prefix = line_prefix " ";
1015 line_length = line_length - 4;
1016 if (line != "") {
1017 add_text_to_paragraph(execute_commands(line));
1018 # We add the ":" to the last word because we count on the function
1019 # "add_text_to_paragraph" to remove the trailing spaces on the line
1020 # first, which would not have happened if we just had appended the ":"
1021 # to the argument in the function call
1022 par_word[par_nb_words - 1] = par_word[par_nb_words - 1] ":";
1023 line = "";
1026 next;
1028 } else if ((command == "table") ||
1029 (command == "ftable") ||
1030 (command == "vtable")) {
1031 # "ftable" and "vtable" are the same as "table" except they are adding automatically
1032 # the item to the appropriate Index (respectively Function and Variable indexes).
1033 # As we do not generate index in the text file, we just treat them identically
1034 if (cond_state) {
1035 generate_paragraph();
1036 par_mode_push("table");
1037 list_is_first_item = 1;
1038 list_item_wants_sepline = 0;
1039 par_indent = 1;
1040 line_prefix = line_prefix " ";
1041 gsub(/[ \t]/, "", line);
1042 if (line !~ /^@[a-z][a-z]*$/) {
1043 report_error("invalid usage of @table, expecting a single style-changing command");
1045 item_list_mark = line;
1046 write_line("");
1048 next;
1050 } else if (command == "titlepage") {
1051 generate_title_page();
1052 next;
1054 # Commands generating text automacitally ###################################
1055 } else if (command == "contents") {
1056 if (cond_state) {
1057 generate_paragraph();
1058 write_line("");
1059 write_line("");
1060 print "@table_of_content@";
1062 next;
1064 } else if (command == "insertcopying") {
1065 if (cond_state) {
1066 generate_paragraph();
1067 # The copying block was already formatted, we just have to print it as-is
1068 for (i = 0; i < copyright_count; i++) {
1069 write_line(copyright_lines[i]);
1072 next;
1074 } else if (command == "page") {
1075 generate_page_break();
1076 next;
1078 } else if (command == "sp") {
1079 if (cond_state) {
1080 generate_paragraph();
1081 while (line > 0) {
1082 write_line("");
1083 line--;
1086 next;
1088 } else if (command == "vskip") {
1089 # Silently ignore, this is just for TeX
1090 if (cond_state) {
1091 generate_paragraph();
1093 next;
1095 # Variable and Conditional commands ########################################
1096 } else if (command == "ifdocbook") { start_conditional(command, 0); line = ""; next;
1097 } else if (command == "ifhtml") { start_conditional(command, 0); line = ""; next;
1098 } else if (command == "ifinfo") { start_conditional(command, 1); line = ""; next; # "for historical compatibility"
1099 } else if (command == "ifplaintext") { start_conditional(command, 1); line = ""; next;
1100 } else if (command == "iftex") { start_conditional(command, 0); line = ""; next;
1101 } else if (command == "ifxml") { start_conditional(command, 0); line = ""; next;
1103 } else if (command == "ifnotdocbook") { start_conditional(command, 1); line = ""; next;
1104 } else if (command == "ifnothtml") { start_conditional(command, 1); line = ""; next;
1105 } else if (command == "ifnotinfo") { start_conditional(command, 0); line = ""; next; # "for historical compatibility"
1106 } else if (command == "ifnotplaintext") { start_conditional(command, 0); line = ""; next;
1107 } else if (command == "ifnottex") { start_conditional(command, 1); line = ""; next;
1108 } else if (command == "ifnotxml") { start_conditional(command, 1); line = ""; next;
1110 } else if (command == "ifclear") { start_conditional(command, (variable[line] == "")); next;
1111 } else if (command == "ifset") { start_conditional(command, (variable[line] != "")); next;
1113 } else if (command == "clear") {
1114 if (cond_state) {
1115 variable[ execute_commands(line) ] = "";
1117 next;
1119 } else if (command == "set") {
1120 if (cond_state) {
1121 set_variable(line);
1123 next;
1125 # Miscelleanous commands ###################################################
1126 } else if (command == "bye") {
1127 # Mark the end of file, we are supposed to ignore everything after
1128 if (cond_state) {
1129 generate_paragraph();
1130 while (getline != 0) { }
1131 bye_marker_found = 1;
1133 next;
1135 } else if (command == "c") {
1136 # Comments: ignore everything to the end of line
1137 next;
1139 } else if (command == "errormsg") {
1140 print "Error: " execute_commands(cmdargs) > "/dev/stderr";
1141 print " (from \"'"$input_file"'\", line " NR ")" > "/dev/stderr";
1142 bye_marker_found = 1;
1143 exit 4;
1145 } else if (command == "finalout") {
1146 # Nothing to do, we are not generating anything in output file about long lines
1147 next;
1149 } else if (command == "ignore") {
1150 # These are multi-lines comments
1151 discard_block(command);
1152 next;
1154 } else if (command == "indent") {
1155 par_indent = 1;
1156 if (line == "") { next; }
1157 $0 = line;
1159 } else if (command == "noindent") {
1160 par_indent = 0;
1161 if (line == "") { next; }
1162 $0 = line;
1164 } else if (command == "setfilename") {
1165 # Should set the output file name automatically
1166 # at current time, we just ignore it
1167 next;
1169 } else if (command == "settitle") {
1170 # This is used for page headers
1171 # in a plain text file, it is useless
1172 next;
1175 # Commands that were not recognised here may be commands that can be used
1176 # anywhere in a line but happenned to be at the beginning of the line this
1177 # time, we do nothing so they will be processed by "execute_commands"
1181 /@item/ {
1182 # We treat @item specially because it may generate more than 1 paragraph
1183 if (!cond_state) { next; }
1185 if (par_mode == "table") {
1186 generate_item_in_table($0);
1187 next;
1188 } else if ((par_mode != "list") && (par_mode != "enum")) {
1189 report_error("found @item at line " NR " but not inside an @itemize");
1192 while (1) {
1193 idx = match($0, /@item/);
1194 if (idx == 0) { break; }
1196 # We generate paragraph with all the text seen so far, which is part of
1197 # the previous item
1198 add_text_to_paragraph(substr($0, 1, idx - 1));
1199 generate_paragraph();
1200 $0 = substr($0, idx + 5);
1202 # When an item is found, we clear "par_ident" to actually place the item
1203 # mark on the next paragragh
1204 par_indent = 0;
1207 # If the item is on a line by itself, stop processing the line to avoid
1208 # skipping lines more than necessary
1209 if (/^[ \t]*$/) { next; }
1212 # Non-empty lines are added to the current paragraph
1214 if (!cond_state) { next; }
1216 if ((par_mode == "list") ||
1217 (par_mode == "enum") ||
1218 (par_mode == "par") ||
1219 (par_mode == "table") ||
1220 (par_mode == "titlepage") ||
1221 (par_mode == "quotation")) {
1222 if (/^[ \t]*$/) {
1223 # Empty lines separate paragraphs
1224 generate_paragraph();
1225 # in list of items, they also tell us that user prefers an aerated list
1226 list_item_wants_sepline = 1;
1227 } else {
1228 add_text_to_paragraph(execute_commands($0));
1231 } else if (par_mode == "example") {
1232 # Line is printed unmodified, not split and not merged, but with an indentation
1233 $0 = line_prefix execute_commands($0);
1234 sub(/[ \t]*$/, "");
1235 write_line($0);
1237 } else {
1238 report_error("paragraph mode \"" par_mode "\" is not supported for line processing (line " NR ")");
1242 END {
1243 if (!bye_marker_found) {
1244 report_error("command \"@bye\" missing at end of file");
1246 if (!top_was_found) {
1247 report_error("command \"@top\" was not found in the file");
1250 # Count the number of lines that the ToC will occupy
1251 # we assume the ToC is at the beginning, so all sections will be shifted
1252 # by this number of lines down
1253 toc_nb_lines = 0;
1254 for (i = 1; i <= toc_count; i++) {
1255 if ((i > 1) && (toc_entry_level[i] == 1)) {
1256 toc_nb_lines++;
1258 toc_nb_lines++;
1261 # Generate the ToC
1262 for (i = 1; i <= toc_count; i++) {
1263 if ((i > 1) && (toc_entry_level[i] == 1)) {
1264 print "" > toc_file;
1267 $0 = " " toc_entry_name[i] " ";
1268 if (length($0) % 2) { $0 = $0 " "; }
1269 while (length($0) < 76 - 4) {
1270 $0 = $0 " .";
1273 target_line = toc_entry_line[i] + toc_nb_lines;
1275 $0 = substr($0, 1, (76 - 5) - length(target_line)) " " target_line;
1276 print > toc_file;
1280 ' "$input_file" > "$temp_file" || exit $?
1282 # Run awk for 2nd pass, if it fails also stop now without deleting temp files
1283 awk '
1284 /@table_of_content@/ {
1285 while (getline < "'"$toc_file"'") {
1286 print;
1288 next;
1290 { print }
1291 ' "$temp_file" > "$output_file" || exit $?
1293 # If all worked, remove the temp files
1294 rm -f "$temp_file"
1295 rm -f "$toc_file"