script/generate-txt-from-texi.sh

   1 #!/bin/sh
   2 ###########################################################################
   3 #
   4 #  Window Maker window manager
   5 #
   6 #  Copyright (c) 2014-2015 Christophe CURIS
   7 #  Copyright (c) 2015 Window Maker Team
   8 #
   9 #  This program is free software; you can redistribute it and/or modify
  10 #  it under the terms of the GNU General Public License as published by
  11 #  the Free Software Foundation; either version 2 of the License, or
  12 #  (at your option) any later version.
  13 #
  14 #  This program is distributed in the hope that it will be useful,
  15 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 #  GNU General Public License for more details.
  18 #
  19 #  You should have received a copy of the GNU General Public License along
  20 #  with this program; if not, write to the Free Software Foundation, Inc.,
  21 #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  22 #
  23 ###########################################################################
  24 #
  25 # generate-txt-from-texi.sh:
  26 #   generate a plain text file from a texinfo documentation
  27 #
  28 # The goal is to achieve a result similar to:
  29 #   texi2any --plaintext --no-split <filename>
  30 #
  31 # The reason for this script is that we do not want to add a strict
  32 # dependancy on the 'makeinfo' tool suite (which has its own dependancies)
  33 #
  34 # There is also the problem that we use is to generate some documentations
  35 # that should be available before the 'configure' script have been run
  36 # (in the 'autogen.sh' script) because some of these documentation provide
  37 # information for running the 'configure' script; We distribute these
  38 # generated docs so normal user won't have the problem, but people that
  39 # want to build from the Git repository.
  40 #
  41 # This script is not a reference for Texinfo syntax, so if you modified the
  42 # texi source, you should really consider running texi2any at least once to
  43 # make sure everything is still ok.
  44 #
  45 ###########################################################################
  46 #
  47 # Despite trying to be close to the texi2any output, this script has a few
  48 # known differences:
  49 #
  50 #  - texi2any does not generate a proper title header, it satisfy itself
  51 # with a rudimentary single line; this script generates a better looking
  52 # header with all the information provided
  53 #
  54 #  - the table of content contains the line number for the section to ease
  55 # navigation contrary to texi2any that satisfy itself with a simplist toc
  56 #
  57 #  - the paragraphs are properly justified, contrary to texi2any which only
  58 # flush left them in text outputs
  59 #
  60 #  - There are 2 blank lines instead of 1 before chapter/section to make
  61 # them stand out more
  62 #
  63 #  - the line length is set to 76 instead of 72
  64 #
  65 #  - there are some difference in what characters are added when a style is
  66 # used for a string (@emph, @file, @sc, ...) because it assumes the text
  67 # will be read in a plain text tool with no special "smart" highlighting
  68 #
  69 #  - it does not check that the syntax is valid; there are some simple
  70 # checks but it may misbehave, it does not replace a quality check with
  71 # texi2any
  72 #
  73 #  - not all commands are implemented, some because they were not needed
  74 # so far, probably a few because they would be too complex to implement
  75 #
  76 ###########################################################################
  77 #
  78 # Please note that this script is writen in sh+awk on purpose: this script
  79 # is gonna be run on the machine of the person who is trying to compile
  80 # WindowMaker, and as such we cannot be sure to find any scripting language
  81 # in a known version and that works (python/ruby/tcl/perl/php/you-name-it).
  82 #
  83 # So for portability, we stick to the same sh+awk constraint as Autotools
  84 # to limit the problem, see for example:
  85 #   http://www.gnu.org/savannah-checkouts/gnu/autoconf/manual/autoconf-2.69/html_node/Portable-Shell.html
  86 #
  87 ###########################################################################
  88
  89 # Report an error on stderr and exit with status 1 to tell make could not work
  90 arg_error() {
  91     echo "$0: $@" >&2
  92     exit 1
  93 }
  94
  95 # print help and exit with success status
  96 print_help() {
  97     echo "$0: convert a Texinfo file into a plain text file"
  98     echo "Usage: $0 [options...] file.texi"
  99     echo "valid options are:"
 100     echo "  -Dvar=val  : set variable 'var' to value 'val'"
 101     echo "  -e email   : set email address in variable 'emailsupport'"
 102     echo "  -v version : version of the project"
 103     echo "  -o file    : name of text file to create"
 104     exit 0
 105 }
 106
 107 # Extract command line arguments
 108 while [ $# -gt 0 ]; do
 109     case $1 in
 110
 111         -D*)
 112             echo "$1" | grep '^-D[a-zA-Z][a-zA-Z]*=' > /dev/null || arg_error "syntax error for '$1', expected -Dname=value"
 113             var_defs="$var_defs
 114 `echo "$1" | sed -e 's/^-D/  variable["/ ; s/=/"] = "/ ; s/$/";/' `"
 115           ;;
 116
 117         -e)
 118             shift
 119             var_defs="$var_defs
 120   variable[\"emailsupport\"] = \"@email{`echo "$1" | sed -e 's/@/@@/g' `}\";"
 121           ;;
 122
 123         -h|-help|--help) print_help ;;
 124
 125         -o)
 126             shift
 127             output_file="$1"
 128           ;;
 129
 130         -v)
 131             shift
 132             project_version="$1"
 133           ;;
 134
 135         -*) arg_error "unknow option '$1'" ;;
 136
 137         *)
 138             [ "x$input_file" = "x" ] || arg_error "only 1 input file can be specified, not \"$input_file\" and \"$1\""
 139             input_file="$1"
 140           ;;
 141     esac
 142     shift
 143 done
 144
 145 # Check consistency of command-line
 146 [ "x$input_file" != "x" ] || arg_error "no input texi file given"
 147 [ "x$output_file" != "x" ] || arg_error "no output file given"
 148
 149 ###########################################################################
 150 # The script works in 2 passes, in the first pass we generate an almost
 151 # complete text file in the temporary file $temp_file, it also generates
 152 # the table of content in $toc_file as a sed script.
 153 # The second pass generate the $output_file from that $temp_file and the
 154 # $toc_file
 155 ###########################################################################
 156
 157 # Create the temp file in the current directory
 158 temp_file="`echo "$input_file" | sed -e 's,^.*/\([^/]*\)$,\1, ; s,\.[^.]*$,,' `.tmp"
 159 toc_file="`echo "$temp_file" | sed -e 's,\.[^.]*$,,' `.toc"
 160
 161 # Run awk for 1st pass, but if it fails stop now without deleting temp files
 162 awk '
 163 # Stop processing everything, print the message for user and return error code
 164 # to tell "make" to not go further
 165 function report_error(message) {
 166   print "Error: " message > "/dev/stderr";
 167
 168   # When we call "exit", the block "END" is still called, we falsely set
 169   # this variable to skip a spurious second error message
 170   bye_marker_found = 1;
 171
 172   # Code 1 is used when the script is invoked with incorrect arguments
 173   # Code 2 is used by awk to report problems
 174   exit 3;
 175 }
 176
 177 # Conditionals for @ifXXX and @ifnotXXX commands
 178 # stored in a stack to allow embedding conditionals inside other conditionals
 179 # the global variable "cond_state" contains the current condition (0 or 1)
 180 function start_conditional(name, value,         local_i) {
 181   cond_level++;
 182   cond_names[cond_level] = name;
 183   cond_value[cond_level] = value;
 184   cond_state = value;
 185   for (local_i = 1; local_i < cond_level; local_i++) {
 186     cond_state = cond_state && cond_value[local_i];
 187   }
 188 }
 189
 190 function end_conditional(name,          local_i) {
 191   cond_level--;
 192   cond_state = 1;
 193   for (local_i = 1; local_i < cond_level; local_i++) {
 194     cond_state = cond_state && cond_value[local_i];
 195   }
 196 }
 197
 198 # Texinfo Variables
 199 # the texinfo standard allows to have variables set with @set and used
 200 # with @value; they can also be defined from command-line (-D)
 201 # they are stored in the global array "variable[name]"
 202 function set_variable(line,          local_idx, local_name, local_value) {
 203   gsub(/^[ \t]*/, "", line);
 204   local_idx = match(line, /[ \t]/);
 205   if (local_idx > 0) {
 206     local_name  = substr(line, 1, local_idx - 1);
 207     local_value = substr(line, local_idx + 1);
 208     gsub(/^[ \t]*/, "", local_value);
 209   } else {
 210     local_name  = line;
 211     local_value = "";
 212   }
 213   variable[ local_name ] = local_value;
 214 }
 215
 216 # Write a single line to the output
 217 function write_line(line) {
 218   if (!cond_state) { return; }
 219
 220   if (redirect_out == "no") {
 221     print line;
 222     line_number++;
 223
 224   } else if (redirect_out == "copyright") {
 225     copyright_lines[copyright_count++] = line;
 226
 227   } else {
 228     report_error("redirect output mode \"" redirect_out "\" is not supported (line " NR ")");
 229   }
 230 }
 231
 232 # Paragraph modes
 233 # the current mode for paragraph handling is a Stack to allow embedding
 234 # modes inside other modes
 235 # the global variable "par_mode" contains the active mode
 236 function par_mode_push(mode,          local_i) {
 237   par_mode_count++;
 238   par_mode_save_previous[par_mode_count] = par_mode;
 239   par_mode_save_length[par_mode_count] = line_length;
 240   par_mode_save_prefix[par_mode_count] = line_prefix;
 241   par_mode_save_justify[par_mode_count] = par_justify;
 242   par_mode_save_itemmark[par_mode_count] = item_list_mark;
 243   par_mode = mode;
 244
 245   # Check for quality of output
 246   if (length(line_prefix) + 25 > line_length) {
 247     print "Warning: too many paragraph modes imbricated at line " NR " for " mode > "/dev/stderr";
 248     line_length = length(line_prefix) + 25;
 249   }
 250 }
 251
 252 function par_mode_pop(mode,          local_i) {
 253   if ((par_mode != mode) || (par_mode_count <= 0)) {
 254     report_error("found @end " mode " at line " NR " but not in @" mode " (current state is @" par_mode ")");
 255   }
 256   par_mode = par_mode_save_previous[par_mode_count];
 257   line_length = par_mode_save_length[par_mode_count];
 258   line_prefix = par_mode_save_prefix[par_mode_count];
 259   par_justify = par_mode_save_justify[par_mode_count];
 260   item_list_mark = par_mode_save_itemmark[par_mode_count];
 261   par_mode_count--;
 262 }
 263
 264 # Discard all the lines in the file until the specified "@end" is found on a line by itself
 265 function discard_block(name,          local_start_line) {
 266   local_start_line = NR;
 267   while (1) {
 268     if (getline == 0) { report_error("end of file reached while searching \"@end " name "\", started at line " local_start_line); }
 269     if ($0 == "@end " name) { break; }
 270   }
 271 }
 272
 273 # Title Page generation
 274 function generate_title_page() {
 275   if (!cond_state) { return; }
 276
 277   if (par_nb_words > 0) {
 278     generate_paragraph();
 279     write_line(gen_underline(0, 76));
 280   }
 281
 282   # Title page start with 5 blank lines so the "title" coming after will
 283   # stand out a little bit
 284   write_line("");
 285   write_line("");
 286   write_line("");
 287   par_mode_push("titlepage");
 288   line_prefix = "  ";
 289   line_length = 76 - 4;
 290 }
 291
 292 function generate_title_page_title(title,          local_array, local_count, local_i) {
 293   if (!cond_state) { return; }
 294
 295   if (par_mode != "titlepage") {
 296     report_error("command @title used outside @titlepage, at line " NR);
 297   }
 298   generate_paragraph();
 299
 300   # Title deserves more space
 301   write_line("");
 302   write_line("");
 303
 304   # Split long title
 305   if (length(title) < 60) {
 306     local_count = 1;
 307     local_array[1] = title;
 308   } else {
 309     local_count = int((length(title) + 59 ) / 60);
 310     sub_length = int((length(title) + local_count - 1) / local_count);
 311
 312     local_count = 0;
 313     while (length(title) > 0) {
 314       if (length(title) > sub_length) {
 315         # Cut at first space before the length
 316         local_i = sub_length + 1;
 317         while (local_i > 0) {
 318           if (substr(title, local_i, 1) == " ") { break; }
 319           local_i--;
 320         }
 321         if (local_i == 0) {
 322           # Can not break first word, break at first possible place
 323           local_i = index(title, " ");
 324           if (local_i == 0) { local_i = length(title) + 1; }
 325         }
 326       } else {
 327         local_i = length(title) + 1;
 328       }
 329
 330       local_count++;
 331       local_array[local_count] = substr(title, 1, local_i - 1);
 332
 333       title = substr(title, local_i + 1);
 334     }
 335   }
 336
 337   # Center the title
 338   for (local_i = 1; local_i <= local_count; local_i++) {
 339     write_line(gen_underline(-1, int((76 - length(local_array[local_i])) / 2)) local_array[local_i]);
 340   }
 341
 342   write_line("");
 343   write_line("");
 344 }
 345
 346 function generate_title_page_subtitle(title,          local_array, local_count, local_i) {
 347   if (!cond_state) { return; }
 348
 349   if (par_mode != "titlepage") {
 350     report_error("command @subtitle used outside @titlepage, at line " NR);
 351   }
 352   generate_paragraph();
 353
 354   # Split long lines
 355   if (length(title) < 65) {
 356     local_count = 1;
 357     local_array[1] = title;
 358   } else {
 359     local_count = int((length(title) + 64) / 65);
 360     sub_length = int((length(title) + local_count - 1) / local_count);
 361
 362     local_count = 0;
 363     while (length(title) > 0) {
 364       if (length(title) > sub_length) {
 365         # Cut at first space before the length
 366         local_i = sub_length + 1;
 367         while (local_i > 0) {
 368           if (substr(title, local_i, 1) == " ") { break; }
 369           local_i--;
 370         }
 371         if (local_i == 0) {
 372           # Can not break first word, break at first possible place
 373           local_i = index(title, " ");
 374           if (local_i == 0) { local_i = length(title) + 1; }
 375         }
 376       } else {
 377         local_i = length(title) + 1;
 378       }
 379
 380       local_count++;
 381       local_array[local_count] = substr(title, 1, local_i - 1);
 382
 383       title = substr(title, local_i + 1);
 384     }
 385   }
 386
 387   # Center the title
 388   for (local_i = 1; local_i <= local_count; local_i++) {
 389     write_line(gen_underline(-1, int((76 - length(local_array[local_i]) - 4) / 2)) "~ " local_array[local_i] " ~");
 390   }
 391 }
 392
 393 # Generate separation line to simulate page breaks in plain text file
 394 function generate_page_break() {
 395   if (!cond_state) { return; }
 396
 397   generate_paragraph();
 398   if (par_mode = "titlepage") {
 399     write_line("");
 400   }
 401   write_line("");
 402   write_line(gen_underline(0, 76));
 403   par_indent = 1;
 404 }
 405
 406 # Handle chapter and section declaration
 407 # take care of the automatic numbering and to put the line in the table of
 408 # content file, then generate the underlined line in output
 409 function new_section(level, title, is_numbered,         local_i, local_line) {
 410   if (!cond_state) { return; }
 411
 412   # Dump the current paragraph now
 413   generate_paragraph();
 414
 415   # Update the counters
 416   if (is_numbered) {
 417     section[level]++;
 418     for (local_i = level + 1; local_i <= 4; local_i++) {
 419       section[local_i] = 0;
 420     }
 421   }
 422
 423   # Generate the line to be displayed
 424   if (is_numbered) {
 425     local_line = section[1];
 426     for (local_i = 2; local_i <= level; local_i++) {
 427       local_line = local_line "." section[local_i];
 428     }
 429     local_line = local_line " " title;
 430   } else {
 431     local_line = title;
 432   }
 433
 434   # Add the entry to the ToC
 435   toc_count++;
 436   toc_entry_level[toc_count] = level;
 437   toc_entry_name[toc_count] = local_line;
 438   for (local_i = 1; local_i < level; local_i++) {
 439     toc_entry_name[toc_count] = "  " toc_entry_name[toc_count];
 440   }
 441   toc_entry_line[toc_count] = line_number + 3;
 442
 443   # Print the section description
 444   write_line("");
 445   write_line("");
 446   write_line(local_line);
 447   write_line(gen_underline(level, length(local_line)));
 448   par_indent = 0;
 449 }
 450
 451 # List of Items
 452 function start_item_list(mark, type, default_mark) {
 453   par_mode_push(type);
 454   list_is_first_item = 1;
 455   list_item_wants_sepline = 0;
 456   par_indent = 1;
 457   if (line_prefix == "") {
 458     # First level of enumeration get one mode indentation space
 459     line_prefix = "     ";
 460   } else {
 461     line_prefix = line_prefix "    ";
 462   }
 463   if (mark == "") {
 464     item_list_mark = default_mark;
 465   } else {
 466     item_list_mark = execute_commands(mark);
 467   }
 468   write_line("");
 469 }
 470
 471 # One item in a Table
 472 function generate_item_in_table(line) {
 473   if (line !~ /^[ \t]*@itemx?[ \t]/) {
 474     report_error("bas usage for @item inside a @table, should be at start of line and followed by its value");
 475   }
 476
 477   generate_paragraph();
 478   if (list_item_wants_sepline && !list_is_first_item) {
 479     write_line("");
 480   }
 481
 482   # Apply the global table style to this item
 483   gsub(/^[ \t]*@itemx?[ \t]*/, "", line);
 484   line = execute_commands(item_list_mark "{" line "}");
 485
 486   # Cancel the indentation added for the 2nd column for that line
 487   line = substr(line_prefix, 1, length(line_prefix)-5)  line;
 488   write_line(line);
 489
 490   list_item_wants_sepline = 0;
 491 }
 492
 493 # Generate Underline string with the specified length
 494 function gen_underline(id, len,          local) {
 495   if (id == -1) { local = "          "; } else
 496   if (id == 1)  { local = "**********"; } else
 497   if (id == 2)  { local = "=========="; } else
 498   if (id == 3)  { local = "----------"; } else
 499   if (id == 4)  { local = ".........."; }
 500   else { local = "~~~~~~~~~~"; }
 501   while (length(local) < len) {
 502     local = local local;
 503   }
 504   return substr(local, 1, len);
 505 }
 506
 507 # Generate text for an URL link
 508 function generate_url_reference(args,          local_nb, local_arr) {
 509   local_nb = split(args, local_arr, ",");
 510   if (local_nb == 1) {
 511     return local_arr[1];
 512
 513   } else if (local_nb == 2) {
 514     return execute_commands(local_arr[2]) " (" local_arr[1] ")";
 515
 516   } else if (local_nb == 3) {
 517     return execute_commands(local_arr[3]);
 518
 519   } else {
 520     report_error("bad number of argument " local_nb " for @uref at line " NR);
 521   }
 522 }
 523
 524 # Generate a line with the name of an author
 525 # note, we assume the name(s) always fit on a line
 526 function generate_author_line(name,          local_offset, local_attach_to_par) {
 527   if (!cond_state) { return; }
 528
 529   local_attach_to_par = (par_nb_words > 0);
 530
 531   generate_paragraph();
 532
 533   if (par_mode == "titlepage") {
 534     name = "--  " name "  --";
 535     local_offset = int((76 - length(name)) / 2);
 536     if (local_offset < 2) { local_offset = 2; }
 537     write_line("");
 538     write_line(gen_underline(-1, local_offset) name);
 539
 540   } else if (par_mode == "quotation") {
 541     name = "-- " name;
 542     local_offset = int((line_length - length(line_prefix) - length(name)) * 2/3);
 543     if (local_offset < length(line_prefix) + 2) { local_offset = length(line_prefix) + 2; }
 544     if (!local_attach_to_par) { write_line(""); }
 545     write_line(line_prefix gen_underline(-1, local_offset) name);
 546
 547   } else {
 548     report_error("command @author used in an inappropriate mode (" par_mode ") at line " NR);
 549   }
 550 }
 551
 552 # Add the specified line to the curren paragraph being built, do not print anything yet
 553 function add_text_to_paragraph(line) {
 554   nb = split(line, words, /[ \t]+/);
 555   for (i = 1; i <= nb; i++) {
 556     if (words[i] != "") {
 557       par_word[par_nb_words++] = words[i];
 558     }
 559   }
 560 }
 561
 562 # Print the paragraph from all the lines read so far
 563 function generate_paragraph(          local_prefix, local_line, local_length,
 564                                       idx_word_start, idx_word_end, local_i) {
 565   if (par_nb_words <= 0) { return; }
 566
 567   local_line = line_prefix;
 568
 569   if (par_mode == "list") {
 570     if (list_item_wants_sepline && !list_is_first_item) {
 571       write_line("");
 572     }
 573     list_is_first_item = 0;
 574     list_item_wants_sepline = 0;
 575     if (!par_indent) {
 576       local_prefix = item_list_mark " ";
 577       while (length(local_prefix) < 5) { local_prefix = " " local_prefix; }
 578       local_line = substr(local_line, 1, length(local_line) - 5) local_prefix;
 579     }
 580
 581   } else if (par_mode == "enum") {
 582     if (list_item_wants_sepline && !list_is_first_item) {
 583       write_line("");
 584     }
 585     list_is_first_item = 0;
 586     list_item_wants_sepline = 0;
 587     if (!par_indent) {
 588       local_prefix = "  " item_list_mark ". ";
 589       local_line = substr(local_line, 1, length(local_line) - 5) local_prefix;
 590
 591       # Increment the enumeration counter for the next item now
 592       if (item_list_mark + 0 == item_list_mark) {
 593         item_list_mark++;
 594       } else {
 595         local_i = index("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", item_list_mark);
 596         if (local_i == 0) {
 597           report_error("value \"" item_list_mark "\" is not supported for enumerated list - invalid @enumerate argument or list too long?");
 598         }
 599         item_list_mark = substr("BCDEFGHIJKLMNOPQRSTUVWXYZ!bcdefghijklmnopqrstuvwxyz!", local_i, 1);
 600       }
 601     }
 602
 603   } else if (par_mode == "table") {
 604     if (list_item_wants_sepline && !list_is_first_item) {
 605       write_line("");
 606     }
 607     list_is_first_item = 0;
 608     list_item_wants_sepline = 0;
 609
 610   } else if (par_mode == "titlepage") {
 611     write_line("");
 612
 613   } else if (par_mode == "par") {
 614     write_line("");
 615     if (par_indent) {
 616       local_line = local_line "   ";
 617     }
 618
 619   } else if (par_mode == "quotation") {
 620     write_line("");
 621     # There is no extra indentation of paragraphs in this mode
 622
 623   } else {
 624     report_error("paragraph mode \"" par_mode "\" is not supported in generate_paragraph (line " NR ")");
 625   }
 626
 627   # Split the paragraph in lines
 628   idx_word_start = 0;
 629   while (idx_word_start < par_nb_words) {
 630     # First word is always printed, this makes sure that words too long for a line will
 631     # always be printed, very likely on a line by themselfs
 632     idx_word_end = idx_word_start;
 633     local_length = length(local_line) + length(par_word[idx_word_start]);
 634     idx_word_start++;
 635
 636     # See how many word we can fit on the line
 637     while (idx_word_end < par_nb_words - 1) {
 638       if (local_length + 1 + length(par_word[idx_word_end + 1]) > line_length) { break; }
 639       idx_word_end++;
 640       local_length = local_length + 1 + length(par_word[idx_word_end]);
 641     }
 642
 643     # Put all those words on the current line with the appropriate justification
 644     if (par_justify == "right") {
 645       local_line = local_line gen_underline(-1, line_length - local_length) par_word[idx_word_start - 1];
 646       while (idx_word_start <= idx_word_end) {
 647         local_line = local_line " " par_word[idx_word_start++];
 648       }
 649     } else {
 650       if ((par_justify == "left") || (idx_word_end == par_nb_words - 1) ||
 651           (local_length >= line_length) || (idx_word_end < idx_word_start)) {
 652         local_line = local_line par_word[idx_word_start - 1];
 653         while (idx_word_start <= idx_word_end) {
 654           local_line = local_line " " par_word[idx_word_start++];
 655         }
 656       } else {
 657         # We calculate the ideal size of a space (as a real number) which would
 658         # make all the words perfectly fill the line, the formula being
 659         #       ideal size = 1 +     needed_extra_spaces      /      number_of_spaces_in_line
 660         ideal_space_length = 1 + (line_length - local_length) / (idx_word_end - idx_word_start + 1);
 661         count_spaces = 0;
 662         for (local_i = idx_word_start; local_i <= idx_word_end; local_i++) {
 663           count_spaces = count_spaces + ideal_space_length;
 664           word_space[local_i] = gen_underline(-1, int(count_spaces + 0.5));
 665           count_spaces = count_spaces - length(word_space[local_i]);
 666         }
 667
 668         local_line = local_line par_word[idx_word_start - 1];
 669         while (idx_word_start <= idx_word_end) {
 670           local_line = local_line word_space[idx_word_start] par_word[idx_word_start++];
 671         }
 672       }
 673     }
 674
 675     write_line(local_line);
 676
 677     # Reset for next line
 678     local_line = line_prefix;
 679   }
 680   par_nb_words = 0;
 681   par_indent = 1;
 682 }
 683
 684 # Replace commands by text in the line, return the result
 685 function execute_commands(line,               replaced_line, command) {
 686   replaced_line = "";
 687   while (1) {
 688     idx = match(line, /@([a-zA-Z]+|.)/);
 689     if (idx == 0) { break; }
 690
 691     # Separate the command and its arguments from the rest of the line
 692     replaced_line = replaced_line substr(line, 1, idx - 1);
 693     command = substr(line, idx + 1, RLENGTH - 1);
 694     line = substr(line, idx + RLENGTH);
 695
 696     if (line ~ /^\{/) {
 697       # Command has argument(s), extract them
 698       brace_count = 0;
 699       for (i = 1; i <= length(line); i++) {
 700         if (substr(line, i, 1) == "{") {
 701           brace_count++;
 702         }
 703         if (substr(line, i, 1) == "}") {
 704           brace_count--;
 705           if (brace_count == 0) { break; }
 706         }
 707       }
 708       if (brace_count != 0) {
 709         report_error("closing brace not found for command \"@" command "\", at line " NR);
 710       }
 711
 712       cmdargs = substr(line, 2, i-2);
 713       line = substr(line, i + 1);
 714
 715     } else {
 716       # Command does not have arguments, discard the spaces used to separate it
 717       # from the next text
 718       cmdargs = "";
 719       sub(/^[ \t]+/, "", line);
 720     }
 721
 722     # Commands generating "special" characters #################################
 723     if (command == "@") {
 724       replaced_line = replaced_line "@";
 725
 726     } else if (command == "bullet") {
 727       replaced_line = replaced_line "*";
 728
 729     } else if (command == "copyright") {
 730       replaced_line = replaced_line "(c)";
 731
 732     } else if (command == "minus") {
 733       replaced_line = replaced_line "-";
 734
 735     } else if (command == "registeredsymbol") {
 736       replaced_line = replaced_line "(r)";
 737
 738     } else if (command == "today") {
 739       # Make sure the date will be in english (we use "C" because it not certain
 740       # that the English locale is enabled on the machine of the user)
 741       replaced_line = replaced_line "'"`LANG=C date '+%d %B %Y' | sed -e 's,^0,,' `"'";
 742
 743     # Commands to display text in a special style ##############################
 744     } else if (command == "asis") {
 745       line = cmdargs line;
 746
 747     } else if (command == "b") { # bold
 748       line = "*" cmdargs "*" line;
 749
 750     } else if ((command == "cite") ||
 751                (command == "emph")) {
 752       line = cmdargs line;
 753
 754     } else if ((command == "code") ||
 755                (command == "command") ||
 756                (command == "env") ||
 757                (command == "option") ||
 758                (command == "var")) {
 759       # Should be in fixed-spacing font; printed with single-quotes
 760       line = "'\''" cmdargs "'\''" line;
 761
 762     } else if (command == "i") { # italic
 763       line = "_" cmdargs "_" line;
 764
 765     } else if (command == "email") {
 766       line = "<" cmdargs ">" line;
 767
 768     } else if (command == "file") {
 769       line = "\"" cmdargs "\"" line;
 770
 771     } else if (command == "key") {
 772       line = "<" cmdargs ">" line;
 773
 774     } else if (command == "r") { # roman font
 775       line = cmdargs line;
 776
 777     } else if (command == "sc") {
 778       # Small-Caps, keep as-is in plain text
 779       line = cmdargs line;
 780
 781     } else if (command == "t") { # typewriter-like
 782       line = cmdargs line;
 783
 784     } else if (command == "uref") {
 785       replaced_line = replaced_line generate_url_reference(cmdargs);
 786
 787     # Variable and Conditional commands ########################################
 788     } else if (command == "value") {
 789       if (variable[cmdargs] == "") {
 790         report_error("variable '" cmdargs "' is unknow, for @value at line " NR);
 791       }
 792       line = variable[cmdargs] line;
 793
 794     # Miscelleanous commands ###################################################
 795     } else if (command == "c") {
 796       # Comments: ignore everything to the end of line
 797       line = "";
 798
 799     } else {
 800       report_error("unknow command @" command " at line " NR);
 801     }
 802
 803   }
 804   return (replaced_line line);
 805 }
 806
 807 # Handle appropriately the "@end xxx"
 808 function process_end(line) {
 809   if (line == cond_names[cond_level]) {
 810     end_conditional(line);
 811     return;
 812   }
 813   if (line == "copying") {
 814     generate_paragraph();
 815     redirect_out = "no";
 816
 817   } else if (line == "enumerate") {
 818     generate_paragraph();
 819     par_mode_pop("enum");
 820     par_indent = 1;
 821
 822   } else if (line == "example") {
 823     generate_paragraph();
 824     par_mode_pop("example");
 825     par_indent = 1;
 826
 827   } else if (line == "flushleft") {
 828     generate_paragraph();
 829     par_mode_pop(par_mode);
 830     par_indent = 1;
 831
 832   } else if (line == "flushright") {
 833     generate_paragraph();
 834     par_mode_pop(par_mode);
 835     par_indent = 1;
 836
 837   } else if (line == "itemize") {
 838     generate_paragraph();
 839     par_mode_pop("list");
 840     par_indent = 1;
 841
 842   } else if (line == "quotation") {
 843     generate_paragraph();
 844     par_mode_pop("quotation");
 845     par_indent = 1;
 846
 847   } else if ((line == "table") || (line == "ftable") || (line == "vtable")) {
 848     generate_paragraph();
 849     par_mode_pop("table");
 850     par_indent = 1;
 851
 852   } else if (line == "titlepage") {
 853     generate_page_break();
 854     par_mode_pop("titlepage");
 855     par_indent = 0;
 856
 857   } else {
 858     report_error("unknow command @end " line " at line " NR);
 859   }
 860 }
 861
 862 BEGIN {
 863   # Count the lines generated for the Table of Content
 864   line_number = 0;
 865
 866   # To perform some basic checks on the file
 867   top_was_found = 0;
 868   bye_marker_found = 0;
 869
 870   # Paragraph generation parameters
 871   par_mode_count = 0;
 872   par_mode = "par";
 873   par_nb_words = 0;
 874   par_indent = 1;
 875   par_justify = "justify";
 876   redirect_out = "no";
 877   line_length = 76;
 878   line_prefix = "";
 879
 880   # To handle conditional code
 881   cond_level = 0;
 882   cond_state = 1;
 883
 884   # Number of entries in the Table of Content
 885   toc_count = 0;
 886   toc_file = "'"$toc_file"'";
 887
 888   # Define a custom variable so it is possible to differentiate between
 889   # texi2any and this script
 890   variable["cctexi2txt"] = "1.0";
 891
 892   # Variables inherited from the command line'"$var_defs"'
 893 }
 894
 895 # First line is special, we always ignore it
 896 (NR == 1) { next; }
 897
 898 /^[ \t]*@/ {
 899   # Treat the special commands that are supposed to be on a line by themselves
 900   idx = match($0, /^@([a-zA-Z]+)/);
 901   if (idx != 0) {
 902     # Remove the command from current line
 903     command = substr($0, idx + 1, RLENGTH - 1);
 904     line = substr($0, idx + 1 + RLENGTH);
 905     sub(/^[ \t]+/, "", line);
 906
 907     # Commands for structuring the document ####################################
 908     if (command == "chapter") {
 909       new_section(1, execute_commands(line), 1);
 910       next;
 911
 912     } else if (command == "section") {
 913       new_section(2, execute_commands(line), 1);
 914       next;
 915
 916     } else if (command == "subsection") {
 917       new_section(3, execute_commands(line), 1);
 918       next;
 919
 920     } else if (command == "subsubsection") {
 921       new_section(4, execute_commands(line), 1);
 922       next;
 923
 924     } else if (command == "node") {
 925       # We ignore nodes completely, this is for the "info" format only
 926       next;
 927
 928     } else if (command == "top") {
 929       # This is mandatory for "info" format, but useless for plain text
 930       if (top_was_found > 0) {
 931         report_error("command @top at line " NR " but was already found at line " top_was_found);
 932       }
 933       top_was_found = NR;
 934       next;
 935
 936     } else if (command == "unnumbered") {
 937       new_section(1, execute_commands(line), 0);
 938       next;
 939
 940     # Commands for content in the Title Page ###################################
 941     } else if (command == "author") {
 942       generate_author_line(execute_commands(line));
 943       next;
 944
 945     } else if (command == "subtitle") {
 946       generate_title_page_subtitle(execute_commands(line));
 947       next;
 948
 949     } else if (command == "title") {
 950       generate_title_page_title(execute_commands(line));
 951       next;
 952
 953     # Commands changing the way paragraph are displayed ########################
 954     } else if (command == "copying") {
 955       generate_paragraph();
 956       redirect_out = "copyright";
 957       copyright_count = 0;
 958       next;
 959
 960     } else if (command == "end") {
 961       process_end(line);
 962       next;
 963
 964     } else if (command == "enumerate") {
 965       if (cond_state) {
 966         generate_paragraph();
 967         start_item_list(line, "enum", "1");
 968       }
 969       next;
 970
 971     } else if (command == "example") {
 972       if (cond_state) {
 973         generate_paragraph();
 974         write_line("");
 975         par_mode_push("example");
 976         line_prefix = line_prefix "     ";
 977       }
 978       next;
 979
 980     } else if (command == "flushleft") {
 981       if (cond_state) {
 982         generate_paragraph();
 983         par_mode_push(par_mode);
 984         par_justify = "left";
 985         par_indent = 0;
 986       }
 987       next;
 988
 989     } else if (command == "flushright") {
 990       if (cond_state) {
 991         generate_paragraph();
 992         par_mode_push(par_mode);
 993         par_justify = "right";
 994         par_indent = 0;
 995       }
 996       next;
 997
 998     } else if (command == "itemize") {
 999       if (cond_state) {
1000         generate_paragraph();
1001         start_item_list(line, "list", "*");
1002       }
1003       next;
1004
1005     } else if (command == "menu") {
1006       generate_paragraph();
1007       discard_block(command);
1008       next;
1009
1010     } else if (command == "quotation") {
1011       if (cond_state) {
1012         generate_paragraph();
1013         par_mode_push("quotation");
1014         line_prefix = line_prefix "    ";
1015         line_length = line_length - 4;
1016         if (line != "") {
1017           add_text_to_paragraph(execute_commands(line));
1018           # We add the ":" to the last word because we count on the function
1019           # "add_text_to_paragraph" to remove the trailing spaces on the line
1020           # first, which would not have happened if we just had appended the ":"
1021           # to the argument in the function call
1022           par_word[par_nb_words - 1] = par_word[par_nb_words - 1] ":";
1023           line = "";
1024         }
1025       }
1026       next;
1027
1028     } else if ((command == "table") ||
1029                (command == "ftable") ||
1030                (command == "vtable")) {
1031       # "ftable" and "vtable" are the same as "table" except they are adding automatically
1032       # the item to the appropriate Index (respectively Function and Variable indexes).
1033       # As we do not generate index in the text file, we just treat them identically
1034       if (cond_state) {
1035         generate_paragraph();
1036         par_mode_push("table");
1037         list_is_first_item = 1;
1038         list_item_wants_sepline = 0;
1039         par_indent = 1;
1040         line_prefix = line_prefix "     ";
1041         gsub(/[ \t]/, "", line);
1042         if (line !~ /^@[a-z][a-z]*$/) {
1043           report_error("invalid usage of @table, expecting a single style-changing command");
1044         }
1045         item_list_mark = line;
1046         write_line("");
1047       }
1048       next;
1049
1050     } else if (command == "titlepage") {
1051       generate_title_page();
1052       next;
1053
1054     # Commands generating text automacitally ###################################
1055     } else if (command == "contents") {
1056       if (cond_state) {
1057         generate_paragraph();
1058         write_line("");
1059         write_line("");
1060         print "@table_of_content@";
1061       }
1062       next;
1063
1064     } else if (command == "insertcopying") {
1065       if (cond_state) {
1066         generate_paragraph();
1067         # The copying block was already formatted, we just have to print it as-is
1068         for (i = 0; i < copyright_count; i++) {
1069           write_line(copyright_lines[i]);
1070         }
1071       }
1072       next;
1073
1074     } else if (command == "page") {
1075       generate_page_break();
1076       next;
1077
1078     } else if (command == "sp") {
1079       if (cond_state) {
1080         generate_paragraph();
1081         while (line > 0) {
1082           write_line("");
1083           line--;
1084         }
1085       }
1086       next;
1087
1088     } else if (command == "vskip") {
1089       # Silently ignore, this is just for TeX
1090       if (cond_state) {
1091         generate_paragraph();
1092       }
1093       next;
1094
1095     # Variable and Conditional commands ########################################
1096     } else if (command == "ifdocbook") {   start_conditional(command, 0); line = ""; next;
1097     } else if (command == "ifhtml") {      start_conditional(command, 0); line = ""; next;
1098     } else if (command == "ifinfo") {      start_conditional(command, 1); line = ""; next; # "for historical compatibility"
1099     } else if (command == "ifplaintext") { start_conditional(command, 1); line = ""; next;
1100     } else if (command == "iftex") {       start_conditional(command, 0); line = ""; next;
1101     } else if (command == "ifxml") {       start_conditional(command, 0); line = ""; next;
1102
1103     } else if (command == "ifnotdocbook") {   start_conditional(command, 1); line = ""; next;
1104     } else if (command == "ifnothtml") {      start_conditional(command, 1); line = ""; next;
1105     } else if (command == "ifnotinfo") {      start_conditional(command, 0); line = ""; next; # "for historical compatibility"
1106     } else if (command == "ifnotplaintext") { start_conditional(command, 0); line = ""; next;
1107     } else if (command == "ifnottex") {       start_conditional(command, 1); line = ""; next;
1108     } else if (command == "ifnotxml") {       start_conditional(command, 1); line = ""; next;
1109
1110     } else if (command == "ifclear") { start_conditional(command, (variable[line] == "")); next;
1111     } else if (command == "ifset") {   start_conditional(command, (variable[line] != "")); next;
1112
1113     } else if (command == "clear") {
1114       if (cond_state) {
1115         variable[ execute_commands(line) ] = "";
1116       }
1117       next;
1118
1119     } else if (command == "set") {
1120       if (cond_state) {
1121         set_variable(line);
1122       }
1123       next;
1124
1125     # Miscelleanous commands ###################################################
1126     } else if (command == "bye") {
1127       # Mark the end of file, we are supposed to ignore everything after
1128       if (cond_state) {
1129         generate_paragraph();
1130         while (getline != 0) { }
1131         bye_marker_found = 1;
1132       }
1133       next;
1134
1135     } else if (command == "c") {
1136       # Comments: ignore everything to the end of line
1137       next;
1138
1139     } else if (command == "errormsg") {
1140       print "Error: " execute_commands(cmdargs) > "/dev/stderr";
1141       print "   (from \"'"$input_file"'\", line " NR ")" > "/dev/stderr";
1142       bye_marker_found = 1;
1143       exit 4;
1144
1145     } else if (command == "finalout") {
1146       # Nothing to do, we are not generating anything in output file about long lines
1147       next;
1148
1149     } else if (command == "ignore") {
1150       # These are multi-lines comments
1151       discard_block(command);
1152       next;
1153
1154     } else if (command == "indent") {
1155       par_indent = 1;
1156       if (line == "") { next; }
1157       $0 = line;
1158
1159     } else if (command == "noindent") {
1160       par_indent = 0;
1161       if (line == "") { next; }
1162       $0 = line;
1163
1164     } else if (command == "setfilename") {
1165       # Should set the output file name automatically
1166       # at current time, we just ignore it
1167       next;
1168
1169     } else if (command == "settitle") {
1170       # This is used for page headers
1171       # in a plain text file, it is useless
1172       next;
1173
1174     }
1175     # Commands that were not recognised here may be commands that can be used
1176     # anywhere in a line but happenned to be at the beginning of the line this
1177     # time, we do nothing so they will be processed by "execute_commands"
1178   }
1179 }
1180
1181 /@item/ {
1182   # We treat @item specially because it may generate more than 1 paragraph
1183   if (!cond_state) { next; }
1184
1185   if (par_mode == "table") {
1186     generate_item_in_table($0);
1187     next;
1188   } else if ((par_mode != "list") && (par_mode != "enum")) {
1189     report_error("found @item at line " NR " but not inside an @itemize");
1190   }
1191
1192   while (1) {
1193     idx = match($0, /@item/);
1194     if (idx == 0) { break; }
1195
1196     # We generate paragraph with all the text seen so far, which is part of
1197     # the previous item
1198     add_text_to_paragraph(substr($0, 1, idx - 1));
1199     generate_paragraph();
1200     $0 = substr($0, idx + 5);
1201
1202     # When an item is found, we clear "par_ident" to actually place the item
1203     # mark on the next paragragh
1204     par_indent = 0;
1205   }
1206
1207   # If the item is on a line by itself, stop processing the line to avoid
1208   # skipping lines more than necessary
1209   if (/^[ \t]*$/) { next; }
1210 }
1211
1212 # Non-empty lines are added to the current paragraph
1213 {
1214   if (!cond_state) { next; }
1215
1216   if ((par_mode == "list") ||
1217       (par_mode == "enum") ||
1218       (par_mode == "par") ||
1219       (par_mode == "table") ||
1220       (par_mode == "titlepage") ||
1221       (par_mode == "quotation")) {
1222     if (/^[ \t]*$/) {
1223       # Empty lines separate paragraphs
1224       generate_paragraph();
1225       # in list of items, they also tell us that user prefers an aerated list
1226       list_item_wants_sepline = 1;
1227     } else {
1228       add_text_to_paragraph(execute_commands($0));
1229     }
1230
1231   } else if (par_mode == "example") {
1232     # Line is printed unmodified, not split and not merged, but with an indentation
1233     $0 = line_prefix execute_commands($0);
1234     sub(/[ \t]*$/, "");
1235     write_line($0);
1236
1237   } else {
1238     report_error("paragraph mode \"" par_mode "\" is not supported for line processing (line " NR ")");
1239   }
1240 }
1241
1242 END {
1243   if (!bye_marker_found) {
1244     report_error("command \"@bye\" missing at end of file");
1245   }
1246   if (!top_was_found) {
1247     report_error("command \"@top\" was not found in the file");
1248   }
1249
1250   # Count the number of lines that the ToC will occupy
1251   # we assume the ToC is at the beginning, so all sections will be shifted
1252   # by this number of lines down
1253   toc_nb_lines = 0;
1254   for (i = 1; i <= toc_count; i++) {
1255     if ((i > 1) && (toc_entry_level[i] == 1)) {
1256       toc_nb_lines++;
1257     }
1258     toc_nb_lines++;
1259   }
1260
1261   # Generate the ToC
1262   for (i = 1; i <= toc_count; i++) {
1263     if ((i > 1) && (toc_entry_level[i] == 1)) {
1264       print "" > toc_file;
1265     }
1266
1267     $0 = "    " toc_entry_name[i] " ";
1268     if (length($0) % 2) { $0 = $0 " "; }
1269     while (length($0) < 76 - 4) {
1270       $0 = $0 " .";
1271     }
1272
1273     target_line = toc_entry_line[i] + toc_nb_lines;
1274
1275     $0 = substr($0, 1, (76 - 5) - length(target_line)) " " target_line;
1276     print > toc_file;
1277   }
1278
1279 }
1280 ' "$input_file" > "$temp_file" || exit $?
1281
1282 # Run awk for 2nd pass, if it fails also stop now without deleting temp files
1283 awk '
1284 /@table_of_content@/ {
1285   while (getline < "'"$toc_file"'") {
1286     print;
1287   }
1288   next;
1289 }
1290 { print }
1291 ' "$temp_file" > "$output_file" || exit $?
1292
1293 # If all worked, remove the temp files
1294 rm -f "$temp_file"
1295 rm -f "$toc_file"