fangle

   1 #! /usr/bin/awk -f
   2 # # fangle - fully featured notangle replacement in awk
   3   #
   4   # Copyright (C) 2009-2010 Sam Liddicott <sam@liddicott.com>
   5   #
   6   # This program is free software: you can redistribute it and/or modify
   7   # it under the terms of the GNU General Public License as published by
   8   # the Free Software Foundation, either version 3 of the License, or
   9   # (at your option) any later version.
  10   #
  11   # This program is distributed in the hope that it will be useful,
  12   # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13   # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14   # GNU General Public License for more details.
  15   #
  16   # You should have received a copy of the GNU General Public License
  17   # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 # NOTE: Arnold Robbins public domain getopt for awk is also used:
  19 # getopt.awk --- do C library getopt(3) function in awk
  20 #
  21 # Arnold Robbins, arnold@skeeve.com, Public Domain
  22 #
  23 # Initial version: March, 1991
  24 # Revised: May, 1993
  25
  26 function getopt(argc, argv, options,    thisopt, i)
  27 {
  28     if (length(options) == 0)    # no options given
  29         return -1
  30     if (argv[Optind] == "--") {  # all done
  31         Optind++
  32         _opti = 0
  33         return -1
  34     } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
  35         _opti = 0
  36         return -1
  37     }
  38     if (_opti == 0)
  39         _opti = 2
  40     thisopt = substr(argv[Optind], _opti, 1)
  41     Optopt = thisopt
  42     i = index(options, thisopt)
  43     if (i == 0) {
  44         if (Opterr)
  45             printf("%c -- invalid option\n",
  46                                   thisopt) > "/dev/stderr"
  47         if (_opti >= length(argv[Optind])) {
  48             Optind++
  49             _opti = 0
  50         } else
  51             _opti++
  52         return "?"
  53     }
  54     if (substr(options, i + 1, 1) == ":") {
  55         # get option argument
  56         if (length(substr(argv[Optind], _opti + 1)) > 0)
  57             Optarg = substr(argv[Optind], _opti + 1)
  58         else
  59             Optarg = argv[++Optind]
  60         _opti = 0
  61     } else
  62         Optarg = ""
  63     if (_opti == 0 || _opti >= length(argv[Optind])) {
  64         Optind++
  65         _opti = 0
  66     } else
  67         _opti++
  68     return thisopt
  69 }
  70
  71 function error(message)
  72 {
  73   print "ERROR: " FILENAME ":" FNR " " message > "/dev/stderr";
  74   exit 1;
  75 }
  76 function warning(message)
  77 {
  78   print "WARNING: " FILENAME ":" FNR " " message > "/dev/stderr";
  79   warnings++;
  80 }
  81 function debug_log(message)
  82 {
  83   print "DEBUG: " FILENAME ":" FNR " " message > "/dev/stderr";
  84 }
  85 function new_mode_tracker(context, language, mode) {
  86   context[""] = 0;
  87   context[0, "language"] = language;
  88   context[0, "mode"] = mode;
  89 }
  90 function push_mode_tracker(context, language, mode,
  91   # local vars
  92   top)
  93 {
  94   if (! ("" in context)) {
  95     split("", context);
  96     new_mode_tracker(context, language, mode);
  97   } else {
  98     top = context[""];
  99     if (context[top, "language"] == language && mode=="") mode = context[top, "mode"];
 100     top++;
 101     context[top, "language"] = language;
 102     context[top, "mode"] = mode;
 103     context[""] = top;
 104   }
 105 }
 106 function dump_mode_tracker(context,
 107   c, d)
 108 {
 109   for(c=0; c <= context[""]; c++) {
 110     printf(" %2d   %s:%s\n", c, context[c, "language"], context[c, "mode"]) > "/dev/stderr";
 111     for(d=1; ( (c, "values", d) in context); d++) {
 112       printf("   %2d %s\n", d, context[c, "values", d]) > "/dev/stderr";
 113     }
 114   }
 115 }
 116 function finalize_mode_tracker(context)
 117 {
 118   if ( ("" in context) && context[""] != 0) return 0;
 119   return 1;
 120 }
 121 function mode_tracker(context, text, values,
 122   # optional parameters
 123   # local vars
 124   mode, submodes, language,
 125   cindex, c, a, part, item, name, result, new_values, new_mode,
 126   delimiters, terminators)
 127 {
 128   cindex = context[""] + 0;
 129   mode = context[cindex, "mode"];
 130   language = context[cindex, "language" ];
 131   submodes=modes[language, mode, "submodes"];
 132
 133   if ((language, mode, "delimiters") in modes) {
 134     delimiters = modes[language, mode, "delimiters"];
 135     if (length(submodes)>0) submodes = submodes "|";
 136     submodes=submodes delimiters;
 137   } else delimiters="";
 138   if ((language, mode, "terminators") in modes) {
 139     terminators = modes[language, mode, "terminators"];
 140     if (length(submodes)>0) submodes = submodes "|";
 141     submodes=submodes terminators;
 142   } else terminators="";
 143  if (! length(submodes)) return text;
 144   while((cindex >= 0) && length(text)) {
 145     if (match(text, "(" submodes ")", a)) {
 146       if (RLENGTH<1) {
 147         error(sprintf("Internal error, matched zero length submode, should be impossible - likely regex computation error\n" \
 148                 "Language=%s\nmode=%s\nmatch=%s\n", language, mode, submodes));
 149       }
 150       part = substr(text, 1, RSTART -1);
 151       item = item part;
 152       if (match(a[1], "^" terminators "$")) {
 153 #printf("%2d EXIT  MODE [%s] by [%s] [%s]\n", cindex, mode, a[1], text) > "/dev/stderr"
 154         context[cindex, "values", ++context[cindex, "values"]] = item;
 155         delete context[cindex];
 156         context[""] = --cindex;
 157         if (cindex>=0) {
 158           mode = context[cindex, "mode"];
 159           language = context[cindex, "language"];
 160             submodes=modes[language, mode, "submodes"];
 161
 162             if ((language, mode, "delimiters") in modes) {
 163               delimiters = modes[language, mode, "delimiters"];
 164               if (length(submodes)>0) submodes = submodes "|";
 165               submodes=submodes delimiters;
 166             } else delimiters="";
 167             if ((language, mode, "terminators") in modes) {
 168               terminators = modes[language, mode, "terminators"];
 169               if (length(submodes)>0) submodes = submodes "|";
 170               submodes=submodes terminators;
 171             } else terminators="";
 172            if (! length(submodes)) return text;
 173         }
 174         item = item a[1];
 175         text = substr(text, 1 + length(part) + length(a[1]));
 176       }
 177       else if (match(a[1], "^" delimiters "$")) {
 178         if (cindex==0) {
 179           context[cindex, "values", ++context[cindex, "values"]] = item;
 180           item = "";
 181         } else {
 182           item = item a[1];
 183         }
 184         text = substr(text, 1 + length(part) + length(a[1]));
 185       }
 186  else if ((language, a[1], "terminators") in modes) {
 187         #check if new_mode is defined
 188         item = item a[1];
 189 #printf("%2d ENTER MODE [%s] in [%s]\n", cindex, a[1], text) > "/dev/stderr"
 190         text = substr(text, 1 + length(part) + length(a[1]));
 191         context[""] = ++cindex;
 192         context[cindex, "mode"] = a[1];
 193         context[cindex, "language"] = language;
 194         mode = a[1];
 195           submodes=modes[language, mode, "submodes"];
 196
 197           if ((language, mode, "delimiters") in modes) {
 198             delimiters = modes[language, mode, "delimiters"];
 199             if (length(submodes)>0) submodes = submodes "|";
 200             submodes=submodes delimiters;
 201           } else delimiters="";
 202           if ((language, mode, "terminators") in modes) {
 203             terminators = modes[language, mode, "terminators"];
 204             if (length(submodes)>0) submodes = submodes "|";
 205             submodes=submodes terminators;
 206           } else terminators="";
 207          if (! length(submodes)) return text;
 208       } else {
 209         error(sprintf("Submode '%s' set unknown mode in text: %s\nLanguage %s Mode %s\n", a[1], text, language, mode));
 210         text = substr(text, 1 + length(part) + length(a[1]));
 211       }
 212     }
 213 else {
 214       context[cindex, "values", ++context[cindex, "values"]] = item text;
 215       text = "";
 216       item = "";
 217     }
 218   }
 219
 220   context["item"] = item;
 221
 222   if (length(item)) context[cindex, "values", ++context[cindex, "values"]] = item;
 223   return text;
 224 }
 225
 226 function untab(text) {
 227   gsub("[[:space:]]*\xE2\x86\xA4","", text);
 228   return text;
 229 }
 230 function transform_escape(s, r, text,
 231     # optional
 232     max,
 233         # local vars
 234         c)
 235 {
 236   for(c=1; c <= max && (c in s); c++) {
 237     gsub(s[c], r[c], text);
 238   }
 239   return text;
 240 }
 241 function mode_escaper(context, s, r, src,
 242   c, cp, cpl)
 243 {
 244   for(c = context[""]; c >= 0; c--) {
 245     if ( (context[c, "language"], context[c, "mode"]) in escapes) {
 246       cpl = escapes[context[c, "language"], context[c, "mode"]];
 247       for (cp = 1; cp <= cpl; cp ++) {
 248         ++src;
 249         s[src] = escapes[context[c, "language"], context[c, "mode"], cp, "s"];
 250                                 r[src] = escapes[context[c, "language"], context[c, "mode"], cp, "r"];
 251       }
 252     }
 253   }
 254   return src;
 255 }
 256 function dump_escaper(c, s, r, cc) {
 257   for(cc=1; cc<=c; cc++) {
 258     printf("%2d s[%s] r[%s]\n", cc, s[cc], r[cc]) > "/dev/stderr"
 259   }
 260 }
 261 function parse_chunk_args(language, text, values, mode,
 262   # local vars
 263   c, context, rest)
 264 {
 265   split("", context);
 266   new_mode_tracker(context, language, mode);
 267   rest = mode_tracker(context, text, values);
 268   # extract values
 269   for(c=1; c <= context[0, "values"]; c++) {
 270     values[c] = context[0, "values", c];
 271   }
 272   return rest;
 273 }
 274 function new_chunk(chunk_name, opts, args,
 275   # local vars
 276   p, append )
 277 {
 278   # HACK WHILE WE CHANGE TO ( ) for PARAM CHUNKS
 279   gsub("\\(\\)$", "", chunk_name);
 280   if (! (chunk_name in chunk_names)) {
 281     if (debug) print "New chunk " chunk_name;
 282     chunk_names[chunk_name];
 283     for (p in opts) {
 284       chunks[chunk_name, p] = opts[p];
 285       if (debug) print "chunks[" chunk_name "," p "] = " opts[p];
 286     }
 287     for (p in args) {
 288       chunks[chunk_name, "params", p] = args[p];
 289     }
 290     if ("append" in opts) {
 291       append=opts["append"];
 292       if (! (append in chunk_names)) {
 293         warning("Chunk " chunk_name " is appended to chunk " append " which is not defined yet");
 294         new_chunk(append);
 295       }
 296       chunk_include(append, chunk_name);
 297       chunk_line(append, ORS);
 298     }
 299   }
 300   active_chunk = chunk_name;
 301   prime_chunk(chunk_name);
 302 }
 303
 304 function prime_chunk(chunk_name)
 305 {
 306   chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = \
 307          chunk_name SUBSEP "chunklet" SUBSEP "" ++chunks[chunk_name, "chunklet"];
 308   chunks[chunk_name, "part", chunks[chunk_name, "part"], "FILENAME"] = FILENAME;
 309   chunks[chunk_name, "part", chunks[chunk_name, "part"], "LINENO"] = FNR + 1;
 310 }
 311
 312 function chunk_line(chunk_name, line){
 313   chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"],
 314          ++chunks[chunk_name, "chunklet", chunks[chunk_name, "chunklet"], "line"]  ] = line;
 315 }
 316
 317 function chunk_include(chunk_name, chunk_ref, indent, tail)
 318 {
 319   chunks[chunk_name, "part", ++chunks[chunk_name, "part"] ] = chunk_ref;
 320   chunks[chunk_name, "part", chunks[chunk_name, "part"], "type" ] = part_type_chunk;
 321   chunks[chunk_name, "part", chunks[chunk_name, "part"], "indent" ] = indent_string(indent);
 322   chunks[chunk_name, "part", chunks[chunk_name, "part"], "tail" ] = tail;
 323   prime_chunk(chunk_name);
 324 }
 325
 326 function indent_string(indent) {
 327   return sprintf("%" indent "s", "");
 328 }
 329 function output_chunk_names(   c, prefix, suffix)
 330 {
 331   if (notangle_mode) {
 332     prefix="<<";
 333     suffix=">>";
 334   }
 335   for (c in chunk_names) {
 336     print prefix c suffix "\n";
 337   }
 338 }
 339 function output_chunks(  a)
 340 {
 341   for (a in chunk_names) {
 342     output_chunk(a);
 343   }
 344 }
 345
 346 function output_chunk(chunk) {
 347   newline = 1;
 348   lineno_needed = linenos;
 349
 350   write_chunk(chunk);
 351 }
 352
 353 function write_chunk(chunk_name) {
 354   split("", context);
 355   return write_chunk_r(chunk_name, context);
 356 }
 357
 358 function write_chunk_r(chunk_name, context, indent, tail,
 359   # optional vars
 360   chunk_path, chunk_args,
 361   s, r, src, new_src,
 362   # local vars
 363   chunk_params, part, max_part, part_line, frag, max_frag, text,
 364   chunklet, only_part, call_chunk_args, new_context)
 365 {
 366   if (debug) debug_log("write_chunk_r(" chunk_name ")");
 367   if (match(chunk_name, "^(.*)\\[([0-9]*)\\]$", chunk_name_parts)) {
 368     chunk_name = chunk_name_parts[1];
 369     only_part = chunk_name_parts[2];
 370   }
 371  split("", context);
 372  new_mode_tracker(context, chunks[chunk_name, "language"], "");
 373  split(chunks[chunk_name, "params"], chunk_params, " *; *");
 374   if (! (chunk_name in chunk_names)) {
 375     error(sprintf(_"The root module <<%s>> was not defined.\nUsed by: %s",\
 376                   chunk_name, chunk_path));
 377   }
 378
 379   max_part = chunks[chunk_name, "part"];
 380   for(part = 1; part <= max_part; part++) {
 381     if (! only_part || part == only_part) {
 382       if (linenos && (chunk_name SUBSEP "part" SUBSEP part SUBSEP "FILENAME" in chunks)) {
 383         a_filename = chunks[chunk_name, "part", part, "FILENAME"];
 384         a_lineno = chunks[chunk_name, "part", part, "LINENO"];
 385         if (a_filename != filename || a_lineno != lineno) {
 386           lineno_needed++;
 387         }
 388       }
 389
 390       chunklet = chunks[chunk_name, "part", part];
 391       if (chunks[chunk_name, "part", part, "type"] == part_type_chunk) {
 392         if (match(chunklet, "^([^\\[\\(]*)\\((.*)\\)$", chunklet_parts)) {
 393           chunklet = chunklet_parts[1];
 394         # hack
 395         gsub(sprintf("%c",11), "", chunklet);
 396         gsub(sprintf("%c",11), "", chunklet_parts[2]);
 397           parse_chunk_args("c-like", chunklet_parts[2], call_chunk_args, "(");
 398           for (c in call_chunk_args) {
 399             call_chunk_args[c] = expand_chunk_args(call_chunk_args[c], chunk_params, chunk_args);
 400           }
 401         } else {
 402           split("", call_chunk_args);
 403         }
 404         # update the transforms arrays
 405         new_src = mode_escaper(context, s, r, src);
 406         split("", new_context);
 407         write_chunk_r(chunklet, new_context,
 408                     chunks[chunk_name, "part", part, "indent"] indent,
 409                     chunks[chunk_name, "part", part, "tail"],
 410                     chunk_path "\n         " chunk_name,
 411                     call_chunk_args,
 412                     s, r, new_src);
 413       } else if (chunklet SUBSEP "line" in chunks) {
 414         max_frag = chunks[chunklet, "line"];
 415         for(frag = 1; frag <= max_frag; frag++) {
 416           if (newline && lineno_needed && ! lineno_suppressed) {
 417             filename = a_filename;
 418             lineno = a_lineno;
 419             print "#line " lineno " \"" filename "\"\n"
 420             lineno_needed = 0;
 421           }
 422
 423           text = chunks[chunklet, frag];
 424
 425           /* check params */
 426           text = expand_chunk_args(text, chunk_params, chunk_args);
 427
 428          if (text == "\n") {
 429             lineno++;
 430             if (part == max_part && frag == max_frag && length(chunk_path)) {
 431               text = "";
 432               break;
 433             } else {
 434               newline = 1;
 435             }
 436           } else if (length(text) || length(tail)) {
 437             if (newline) text = indent text;
 438             newline = 0;
 439           }
 440
 441           text = text tail;
 442           mode_tracker(context, text);
 443           print untab(transform_escape(s, r, text, src));
 444           if (linenos) {
 445             lineno_suppressed = substr(lastline, length(lastline)) == "\\";
 446           }
 447         }
 448       } else {
 449         # empty last chunklet
 450       }
 451     }
 452   }
 453   if (! finalize_mode_tracker(context)) {
 454     dump_mode_tracker(context);
 455     error(sprintf(_"Module %s did not close context properly.\nUsed by: %s\n", chunk_name, chunk_path));
 456   }
 457 }
 458 function expand_chunk_args(text, params, args,
 459   p, text_array, next_text, v, t, l)
 460 {
 461   if (split(text, text_array, "\\${")) {
 462     for(p in params) {
 463       v[params[p]]=args[p];
 464     }
 465     text=text_array[1];
 466     for(t=2; t in text_array; t++) {
 467       if (match(text_array[t], "^([a-zA-Z_][a-zA-Z0-9_]*)}", l) &&
 468           l[1] in v)
 469       {
 470         text = text v[l[1]] substr(text_array[t], length(l[1])+2);
 471       } else {
 472         text = text "${" text_array[t];
 473       }
 474     }
 475   }
 476
 477   return text;
 478 }
 479
 480 BEGIN {
 481   ARG_SEPARATOR=sprintf("%c", 11);
 482   part_type_chunk=1;
 483   SUBSEP=",";
 484   modes["c-like", "",  "submodes"]="\\\\|\"|'|{|\\(|\\[";
 485   modes["c-like", "",  "delimiters"]=" *, *";
 486   modes["c-like", "\\", "terminators"]=".";
 487   modes["c-like", "\"", "submodes"]="\\\\";
 488   modes["c-like", "\"", "terminators"]="\"";
 489   escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\\\\";
 490   escapes["c-like", "\"",   escapes["c-like", "\""], "r"]="\\\\";
 491   escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\"";
 492   escapes["c-like", "\"",   escapes["c-like", "\""], "r"]="\\" "\"";
 493   escapes["c-like", "\"", ++escapes["c-like", "\""], "s"]="\n";
 494   escapes["c-like", "\"",   escapes["c-like", "\""], "r"]="\\n";
 495   modes["c-like", "'", "submodes"]="\\\\";
 496   modes["c-like", "'", "terminators"]="'";
 497   escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\\\\";
 498   escapes["c-like", "'",   escapes["c-like", "'"], "r"]="\\\\";
 499   escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="'";
 500   escapes["c-like", "'",   escapes["c-like", "'"], "r"]="\\" "'";
 501   escapes["c-like", "'", ++escapes["c-like", "'"], "s"]="\n";
 502   escapes["c-like", "'",   escapes["c-like", "'"], "r"]="\\n";
 503   modes["c-like", "{",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 504   modes["c-like", "{",  "delimiters"]=" *, *";
 505   modes["c-like", "{",  "terminators"]="}";
 506   modes["c-like", "[",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 507   modes["c-like", "[",  "delimiters"]=" *, *";
 508   modes["c-like", "[",  "terminators"]="\\]";
 509   modes["c-like", "(",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 510   modes["c-like", "(",  "delimiters"]=" *, *";
 511   modes["c-like", "(",  "terminators"]="\\)";
 512
 513   modes["c", "",  "submodes"]="\\\\|\"|'|{|\\(|\\[";
 514   modes["c", "",  "delimiters"]=" *, *";
 515   modes["c", "\\", "terminators"]=".";
 516   modes["c", "\"", "submodes"]="\\\\";
 517   modes["c", "\"", "terminators"]="\"";
 518   escapes["c", "\"", ++escapes["c", "\""], "s"]="\\\\";
 519   escapes["c", "\"",   escapes["c", "\""], "r"]="\\\\";
 520   escapes["c", "\"", ++escapes["c", "\""], "s"]="\"";
 521   escapes["c", "\"",   escapes["c", "\""], "r"]="\\" "\"";
 522   escapes["c", "\"", ++escapes["c", "\""], "s"]="\n";
 523   escapes["c", "\"",   escapes["c", "\""], "r"]="\\n";
 524   modes["c", "'", "submodes"]="\\\\";
 525   modes["c", "'", "terminators"]="'";
 526   escapes["c", "'", ++escapes["c", "'"], "s"]="\\\\";
 527   escapes["c", "'",   escapes["c", "'"], "r"]="\\\\";
 528   escapes["c", "'", ++escapes["c", "'"], "s"]="'";
 529   escapes["c", "'",   escapes["c", "'"], "r"]="\\" "'";
 530   escapes["c", "'", ++escapes["c", "'"], "s"]="\n";
 531   escapes["c", "'",   escapes["c", "'"], "r"]="\\n";
 532   modes["c", "{",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 533   modes["c", "{",  "delimiters"]=" *, *";
 534   modes["c", "{",  "terminators"]="}";
 535   modes["c", "[",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 536   modes["c", "[",  "delimiters"]=" *, *";
 537   modes["c", "[",  "terminators"]="\\]";
 538   modes["c", "(",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 539   modes["c", "(",  "delimiters"]=" *, *";
 540   modes["c", "(",  "terminators"]="\\)";
 541   modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "/\\*";
 542   modes["c", "/*", "terminators"]="\\*/";
 543   modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "//";
 544   modes["c", "//", "terminators"]="\n";
 545   escapes["c", "//", ++escapes["c", "//"], "s"]="\n";
 546   escapes["c", "//",   escapes["c", "//"], "r"]="\n//";
 547   modes["c", "", "submodes"] = modes["c", "", "submodes"] "|" "#";
 548   modes["c", "#", "submodes" ]="\\\\";
 549   modes["c", "#", "terminators"]="\n";
 550   escapes["c", "#", ++escapes["c", "#"], "s"]="\n";
 551   escapes["c", "#",   escapes["c", "#"], "r"]="\\\\\n";
 552
 553   modes["awk", "",  "submodes"]="\\\\|\"|'|{|\\(|\\[";
 554   modes["awk", "",  "delimiters"]=" *, *";
 555   modes["awk", "\\", "terminators"]=".";
 556   modes["awk", "\"", "submodes"]="\\\\";
 557   modes["awk", "\"", "terminators"]="\"";
 558   escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\\\\";
 559   escapes["awk", "\"",   escapes["awk", "\""], "r"]="\\\\";
 560   escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\"";
 561   escapes["awk", "\"",   escapes["awk", "\""], "r"]="\\" "\"";
 562   escapes["awk", "\"", ++escapes["awk", "\""], "s"]="\n";
 563   escapes["awk", "\"",   escapes["awk", "\""], "r"]="\\n";
 564   modes["awk", "'", "submodes"]="\\\\";
 565   modes["awk", "'", "terminators"]="'";
 566   escapes["awk", "'", ++escapes["awk", "'"], "s"]="\\\\";
 567   escapes["awk", "'",   escapes["awk", "'"], "r"]="\\\\";
 568   escapes["awk", "'", ++escapes["awk", "'"], "s"]="'";
 569   escapes["awk", "'",   escapes["awk", "'"], "r"]="\\" "'";
 570   escapes["awk", "'", ++escapes["awk", "'"], "s"]="\n";
 571   escapes["awk", "'",   escapes["awk", "'"], "r"]="\\n";
 572   modes["awk", "{",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 573   modes["awk", "{",  "delimiters"]=" *, *";
 574   modes["awk", "{",  "terminators"]="}";
 575   modes["awk", "[",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 576   modes["awk", "[",  "delimiters"]=" *, *";
 577   modes["awk", "[",  "terminators"]="\\]";
 578   modes["awk", "(",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 579   modes["awk", "(",  "delimiters"]=" *, *";
 580   modes["awk", "(",  "terminators"]="\\)";
 581   modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "#";
 582   modes["awk", "#", "terminators"]="\n";
 583   escapes["awk", "#", ++escapes["awk", "#"], "s"]="\n";
 584   escapes["awk", "#",   escapes["awk", "#"], "r"]="\n#";
 585   modes["awk", "", "submodes"] = modes["awk", "", "submodes"] "|" "/\\^";
 586   modes["awk", "/^", "terminators"]="/";
 587   modes["perl", "",  "submodes"]="\\\\|\"|'|{|\\(|\\[";
 588   modes["perl", "",  "delimiters"]=" *, *";
 589   modes["perl", "\\", "terminators"]=".";
 590   modes["perl", "\"", "submodes"]="\\\\";
 591   modes["perl", "\"", "terminators"]="\"";
 592   escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\\\\";
 593   escapes["perl", "\"",   escapes["perl", "\""], "r"]="\\\\";
 594   escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\"";
 595   escapes["perl", "\"",   escapes["perl", "\""], "r"]="\\" "\"";
 596   escapes["perl", "\"", ++escapes["perl", "\""], "s"]="\n";
 597   escapes["perl", "\"",   escapes["perl", "\""], "r"]="\\n";
 598   modes["perl", "'", "submodes"]="\\\\";
 599   modes["perl", "'", "terminators"]="'";
 600   escapes["perl", "'", ++escapes["perl", "'"], "s"]="\\\\";
 601   escapes["perl", "'",   escapes["perl", "'"], "r"]="\\\\";
 602   escapes["perl", "'", ++escapes["perl", "'"], "s"]="'";
 603   escapes["perl", "'",   escapes["perl", "'"], "r"]="\\" "'";
 604   escapes["perl", "'", ++escapes["perl", "'"], "s"]="\n";
 605   escapes["perl", "'",   escapes["perl", "'"], "r"]="\\n";
 606   modes["perl", "{",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 607   modes["perl", "{",  "delimiters"]=" *, *";
 608   modes["perl", "{",  "terminators"]="}";
 609   modes["perl", "[",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 610   modes["perl", "[",  "delimiters"]=" *, *";
 611   modes["perl", "[",  "terminators"]="\\]";
 612   modes["perl", "(",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 613   modes["perl", "(",  "delimiters"]=" *, *";
 614   modes["perl", "(",  "terminators"]="\\)";
 615   modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "/\\*";
 616   modes["perl", "/*", "terminators"]="\\*/";
 617   modes["perl", "", "submodes"] = modes["perl", "", "submodes"] "|" "#";
 618   modes["perl", "#", "terminators"]="\n";
 619   escapes["perl", "#", ++escapes["perl", "#"], "s"]="\n";
 620   escapes["perl", "#",   escapes["perl", "#"], "r"]="\n#";
 621   modes["sh", "",  "submodes"]="\\\\|\"|'|{|\\(|\\[";
 622   modes["sh", "",  "delimiters"]=" *, *";
 623   modes["sh", "\\", "terminators"]=".";
 624   modes["sh", "\"", "submodes"]="\\\\";
 625   modes["sh", "\"", "terminators"]="\"";
 626   escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\\\";
 627   escapes["sh", "\"",   escapes["sh", "\""], "r"]="\\\\";
 628   escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\"";
 629   escapes["sh", "\"",   escapes["sh", "\""], "r"]="\\" "\"";
 630   escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\n";
 631   escapes["sh", "\"",   escapes["sh", "\""], "r"]="\\n";
 632   modes["sh", "'", "submodes"]="\\\\";
 633   modes["sh", "'", "terminators"]="'";
 634   escapes["sh", "'", ++escapes["sh", "'"], "s"]="\\\\";
 635   escapes["sh", "'",   escapes["sh", "'"], "r"]="\\\\";
 636   escapes["sh", "'", ++escapes["sh", "'"], "s"]="'";
 637   escapes["sh", "'",   escapes["sh", "'"], "r"]="\\" "'";
 638   escapes["sh", "'", ++escapes["sh", "'"], "s"]="\n";
 639   escapes["sh", "'",   escapes["sh", "'"], "r"]="\\n";
 640   modes["sh", "{",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 641   modes["sh", "{",  "delimiters"]=" *, *";
 642   modes["sh", "{",  "terminators"]="}";
 643   modes["sh", "[",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 644   modes["sh", "[",  "delimiters"]=" *, *";
 645   modes["sh", "[",  "terminators"]="\\]";
 646   modes["sh", "(",  "submodes" ]="\\\\|\"|{|\\(|\\[|'|/\\*";
 647   modes["sh", "(",  "delimiters"]=" *, *";
 648   modes["sh", "(",  "terminators"]="\\)";
 649   #<\chunkref{mode:common-string}("sh", "\textbackslash{}"")>
 650   #<\chunkref{mode:common-string}("sh", "'")>
 651   modes["sh", "", "submodes"] = modes["sh", "", "submodes"] "|" "#";
 652   modes["sh", "#", "terminators"]="\n";
 653   escapes["sh", "#", ++escapes["sh", "#"], "s"]="\n";
 654   escapes["sh", "#",   escapes["sh", "#"], "r"]="\n#";
 655   escapes["sh", "\"", ++escapes["sh", "\""], "s"]="\\$";
 656   escapes["sh", "\"",   escapes["sh", "\""], "r"]="\\$";
 657   debug=0;
 658   linenos=0;
 659   notangle_mode=0;
 660   root="*";
 661   tabs = "";
 662
 663   Optind = 1    # skip ARGV[0]
 664   while(getopt(ARGC, ARGV, "R:LdT:hr")!=-1) {
 665     if (Optopt == "R") root = Optarg;
 666     else if (Optopt == "r") root="";
 667     else if (Optopt == "L") linenos = 1;
 668     else if (Optopt == "d") debug = 1;
 669     else if (Optopt == "T") tabs = indent_string(Optarg+0);
 670     else if (Optopt == "h") help();
 671     else if (Optopt == "?") help();
 672   }
 673   for (i=1; i<Optind; i++) { ARGV[i]=""; }
 674 }
 675
 676 #/\n/ {
 677 #  gsub("\n*$","");
 678 #  gsub("\n", " ");
 679 #}
 680 #===
 681 /\xE2\x86\xA6/ {
 682   gsub("\\xE2\\x86\\xA6", "\x09");
 683 }
 684
 685 /\xE2\x80\x98/ {
 686   gsub("\\xE2\\x80\\x98", "`");
 687 }
 688
 689 /\xE2\x89\xA1/ {
 690   if (match($0, "^ *([^[ ]* |)<([^[ ]*)\\[[0-9]*\\][(](.*)[)].*, lang=([^ ]*)", line)) {
 691     next_chunk_name=line[2];
 692     get_texmacs_chunk_args(line[3], next_chunk_params);
 693     gsub(ARG_SEPARATOR ",? ?", ";", line[3]);
 694     params = "params=" line[3];
 695     if ((line[4])) {
 696       params = params ",language=" line[4]
 697     }
 698     get_tex_chunk_args(params, next_chunk_opts);
 699     new_chunk(next_chunk_name, next_chunk_opts, next_chunk_params);
 700     texmacs_chunking = 1;
 701   } else {
 702     warning(sprintf("Unexpected chunk match: %s\n", $_))
 703   }
 704   next;
 705 }
 706 /^\\Chunk{/ {
 707   if (match($0, "^\\\\Chunk{ *([^ ,}]*),?(.*)}", line)) {
 708     next_chunk_name = line[1];
 709     get_tex_chunk_args(line[2], next_chunk_opts);
 710   }
 711   next;
 712 }
 713 /^\\begin{lstlisting}|^\\begin{Chunk}/ {
 714   if (match($0, "}.*[[,] *name= *{? *([^], }]*)", line)) {
 715     new_chunk(line[1]);
 716   } else {
 717     new_chunk(next_chunk_name, next_chunk_opts);
 718   }
 719   chunking=1;
 720   next;
 721 }
 722 /^ *\|____________*/ && texmacs_chunking {
 723   active_chunk="";
 724   texmacs_chunking=0;
 725   chunking=0;
 726 }
 727 /^ *\|\/\\/ && texmacs_chunking {
 728   texmacs_chunking=0;
 729   chunking=0;
 730   active_chunk="";
 731 }
 732 texmacs_chunk=0;
 733 /^ *[1-9][0-9]* *\| / {
 734   if (texmacs_chunking) {
 735     chunking=1;
 736     texmacs_chunk=1;
 737     gsub("^ *[1-9][0-9]* *\\| ", "")
 738   }
 739 }
 740 /^ *\.\/\\/ && texmacs_chunking {
 741   next;
 742 }
 743 /^ *__*$/ && texmacs_chunking {
 744   next;
 745 }
 746 texmacs_chunking {
 747   if (! texmacs_chunk) {
 748     # must be a texmacs continued line
 749     chunking=1;
 750     texmacs_chunk=1;
 751   }
 752 }
 753 ! texmacs_chunk {
 754 #  texmacs_chunking=0;
 755   chunking=0;
 756 }
 757 /^[<]<.*[>]>=/ {
 758   if (match($0, "^[<]<(.*)[>]>= *$", line)) {
 759     chunking=1;
 760     notangle_mode=1;
 761     new_chunk(line[1]);
 762     next;
 763   }
 764 }
 765 /^\\[e]nd{lstlisting}|^\\[e]nd{Chunk}/ {
 766   chunking=0;
 767   active_chunk="";
 768   next;
 769 }
 770 /^@ *$/ {
 771   chunking=0;
 772   active_chunk="";
 773 }
 774 ! chunking { next; }
 775 length(active_chunk) {
 776   if (length(tabs)) {
 777     gsub("\t", tabs);
 778   }
 779   chunk = $0;
 780   indent = 0;
 781   while(match(chunk,"(\xC2\xAB)([^\xC2]*) [^\xC2]*\xC2\xBB", line) ||
 782         match(chunk,
 783               "([=]<\\\\chunkref{([^}>]*)}(\\(.*\\)|)>|<<([a-zA-Z_][-a-zA-Z0-9_]*)>>)",
 784               line)\
 785   ) {
 786     chunklet = substr(chunk, 1, RSTART - 1);
 787     indent += length(chunklet);
 788     chunk_line(active_chunk, chunklet);
 789     chunk = substr(chunk, RSTART + RLENGTH);
 790     if (substr(line[1], 1, 1) == "=") {
 791       # chunk name up to }
 792           # FILTHY HACK
 793           gsub("\\\\#", "#", line[3]);
 794           gsub("\\\\textbackslash{}", "\\", line[3]);
 795           gsub("\\\\\\^", "^", line[3]);
 796       chunk_include(active_chunk, line[2] line[3], indent);
 797     } else if (substr(line[1], 1, 1) == "<") {
 798       chunk_include(active_chunk, line[4], indent);
 799     } else if (line[1] == "\xC2\xAB") {
 800       chunk_include(active_chunk, line[2], indent);
 801     } else {
 802       error("Unknown chunk fragment: " line[1]);
 803     }
 804   }
 805   chunk_line(active_chunk, chunk);
 806   chunk_line(active_chunk, "\n");
 807 }
 808 END {
 809   if (debug) {
 810     print "------ chunk names "
 811     output_chunk_names();
 812     print "====== chunks"
 813     output_chunks();
 814     print "++++++ debug"
 815     for (a in chunks) {
 816       print a "=" chunks[a];
 817     }
 818   }
 819   ORS="";
 820   if (length(root)) output_chunk(root);
 821   else output_chunk_names();
 822 }
 823 function get_texmacs_chunk_args(text, args,   a, done) {
 824   split(text, args, ARG_SEPARATOR);
 825
 826   done=0
 827   for (a=1; (a in args); a++) if (a>1) {
 828     if (args[a] == "" || substr(args[a], 1, 1) == ")") done=1;
 829     if (done) {
 830       delete args[a];
 831       break;
 832     }
 833
 834     if (substr(args[a], 1, 2) == ", ") args[a]=substr(args[a], 3);
 835     else if (substr(args[a], 1, 1) == ",") args[a]=substr(args[a], 2);
 836   }
 837 }
 838 function get_tex_chunk_args(text, values,
 839   # optional parameters
 840   path, # hierarchical precursors
 841   # local vars
 842   a, name)
 843 {
 844   split("", values);
 845   while(length(text)) {
 846     if (match(text, "^ *}(.*)", a)) {
 847       return a[1];
 848     }
 849     if (! match(text, " *([^,=]*[^,= ]) *(([,=]) *(([^,}]*) *,* *(.*))|)$", a)) {
 850       return text;
 851     }
 852     name=a[1];
 853     if (a[3] == "=") {
 854       if (substr(a[4],1,1) == "{") {
 855         text = get_tex_chunk_args(substr(a[4],2), values, path name SUBSEP);
 856       } else {
 857         values[path name]=a[5];
 858         text = a[6];
 859       }
 860     } else {
 861       values[path name]="";
 862       text = a[2];
 863     }
 864   }
 865   return text;
 866 }