texnicard.w

   1 % TeXnicard
   2 % version 0.1
   3
   4 % Licensed by GNU GPL v3 or later version.
   5
   6 \def\contentspagenumber{1}\pageno=3
   7 \def\title{\TeX nicard}
   8 \def\covernote{{\fiverm Batteries not included. Do not use this book as a
   9 flotation device. This is free software; see source file for details.}}
  10
  11 % Prevent \outer from getting in the way, stupid!
  12 \def\+{\tabalign}
  13
  14 @mp@-
  15 ``u{YJ"@<Predeclaration of procedures@>=
  16 qJA";
  17 J"@
  18 "@<Procedure codes@>=
  19 B" {
  20 @)
  21
  22 \long\def\IndexCharacter#1':{`\.{\char`#1}'}
  23 @mcase@-
  24 ``u "case
  25 qAqA/@!@^\IndexCharacter\
  26 Bqu'B"@>
  27 YJ"@<Nothing~@>
  28 @)
  29
  30 \iffalse
  31 @s _decl_head_ =09
  32 @s FILE int
  33 @s dvi_measure int
  34 \fi
  35
  36 \newcount\bibliocount \bibliocount=0
  37 \def\biblio#1{%
  38   \advance\bibliocount by 1 %
  39   $^{[\the\bibliocount]}$%
  40   \expandafter\def\csname biblio \the\bibliocount\endcsname{#1}%
  41 }%
  42
  43 \emergencystretch=\hsize
  44
  45 \def\strike#1{%
  46   \setbox0=\hbox{#1}%
  47   \rlap{\vrule height 3.2pt depth -2.5pt width \wd0}{\box0}%
  48 }%
  49
  50 \def\sectionnumber\PB#1{\sectionnumbernext#1}
  51 \def\sectionnumbernext$\X#1:#2\X${#1}
  52
  53 @*Introduction. This is \TeX nicard, a program designed for similar
  54 purposes of Magic Set Editor, but in a different (and better) way. It
  55 should be able to produce higher quality cards than Wizards of the Coast,
  56 and then they ought to use this program, too!
  57
  58 @^Magic Set Editor@>
  59 @^Wizards of the Coast@>
  60 @^commercial viability@>
  61
  62 @c
  63 @<Memory usage logging@>@;
  64 @<Interpreted C codes@>@;
  65 @<Include files@>@;
  66 @h
  67 @<Typedefs@>@;
  68 @<Late Typedefs@>@;
  69 @<The include file for memory managed types@>@;
  70 @<Global variables@>@;
  71 @<Predeclaration of procedures@>@;
  72 @<Procedure codes@>@;
  73
  74 @ This line below should be changed with the current version number,
  75 whenever a new version is released. (If you fork this program, you should
  76 also include some indication of forking in the \\{version\_string}.)
  77 % (it doesn't work if I use vertical bars here)
  78
  79 @^forking@>
  80
  81 @d version_string "0.1"
  82 @d version_number 1 // one major is worth ten minors
  83
  84 @ @<Typedefs@>=
  85 typedef unsigned char boolean;
  86
  87 @ You might be wondering what this section is for (especially since it
  88 appears to be unused). The reason is that some metamacros use it in order
  89 to force the compiler to know the correct line numbers (in case some lines
  90 have been added by metamacros).
  91
  92 @^nothing@>
  93 @^metamacro@>
  94
  95 @<Nothing~@>= /* ... */
  96
  97 @ There is also memory usage logging. If it is not being compiled for
  98 memory usage logging, it should just ignore these kind of commands.
  99
 100 @<Memory usage logging@>=
 101 #ifndef @!memusage_log
 102 #define @[memusage_log(_text,_arg1)@]
 103 #endif
 104
 105 @*Memory Management. This program uses a lot of similar memory management,
 106 so they will be defined in this chapter.
 107
 108 @^memory management@>
 109
 110 @d none -1 // indication that a |data_index| means nothing
 111
 112 @<Typedefs@>=
 113 typedef struct {
 114   char*data; // pointer to array of blocks (|char*| for use with |sizeof|)
 115   int used; // number of blocks used
 116   int allocated; // number of blocks allocated
 117 } managed_memory;
 118 @#typedef int data_index;
 119
 120 @ We will use an interpreted C code here, which will send output to a
 121 header file |"memory_management.h"|.
 122
 123 @<The include file for memory managed types@>=
 124 #include "memory_management.h"
 125
 126 @ We will need some variables now just to keep track of which kinds of
 127 memory managed areas are needed.
 128
 129 @<Interpreted C codes@>= @{
 130   char**memory_managed_types;
 131   int num_memory_managed_types;
 132   memory_managed_types=malloc(128*sizeof(char*));
 133   num_memory_managed_types=0;
 134 @}
 135
 136 @ From this code, the structure will be created in the header file for
 137 each type that we need a |memory_of|. This section, however, is just a
 138 ``wrapper'' code for the template.
 139
 140 @f @!memory_of _decl_head_ // category 9
 141
 142 @<Interpreted C codes@>= @{
 143   void memory_of$() {
 144     should_output=0;
 145     set_goal("bp","",@+{
 146       sendc(0200|'{'); // begin interpret mode
 147       send("send_memory_of(\"");
 148       set_goal("e","",@+{
 149         send("\");");
 150         sendc(0200|'}'); // end interpret mode
 151         should_output=0;
 152       }@+);
 153     }@+);
 154   }
 155 @}
 156
 157 @ Here is what it does in order to keep a list of the memory managed
 158 types. Note the type name was enclosed in quotation marks, so now it will
 159 be received as a string.
 160
 161 @<Interpreted C codes@>= @{
 162   void send_memory_of(char*s) {
 163     int i;
 164     s++;
 165     @<Send the proper name of the memory managed type@>;
 166     for(i=0;i<num_memory_managed_types;i++) {
 167       if(!strcmp(s,memory_managed_types[i])) return;
 168     }
 169     memory_managed_types[num_memory_managed_types++]=s;
 170   }
 171 @}
 172
 173 @ @<Send the proper name of the memory managed type@>= {
 174   send(" x__");
 175   send(s);
 176   send(" ");
 177 }
 178
 179 @ Now the code you get to in order to define the structures in the header
 180 file. We are mostly just copying the form of our |managed_memory|
 181 structure, but it will be customized to work with the specific type of the
 182 |data| components.
 183
 184 @<Interpreted C codes@>= @{
 185   void send_memory_managed_types() {
 186     int i;
 187     for(i=0;i<num_memory_managed_types;i++) {
 188       send("typedef struct {");
 189       send(memory_managed_types[i]);
 190       send("*data; int used; int allocated; } x__");
 191       send(memory_managed_types[i]);
 192       send(";");
 193     }
 194   }
 195 @}
 196
 197 @ @(memory_management.h@>= @{
 198   send_memory_managed_types();
 199 @}
 200
 201 @ These next two subroutines are used to allocate additional memory.
 202
 203 @d init_memory(_a,_size) init_memory_(&(_a),sizeof(*((_a).data)),(_size))
 204 @d new_record(_area) new_record_(&(_area),sizeof(*((_area).data)))
 205
 206 @-p void*init_memory_(void*mem,int record_size,int num_records) {
 207   managed_memory*m=mem;
 208   m->data=malloc(record_size*num_records);
 209   m->used=0;
 210   m->allocated=num_records;
 211   if(!m->data) @<Fatal error due to lack of memory@>;
 212   return m->data;
 213 }
 214
 215 @ @-p data_index new_record_(void*mem,int record_size) {
 216   managed_memory*m=mem;
 217   m->used++;
 218   if(m->used>m->allocated) {
 219     m->allocated*=2;
 220     m->data=realloc(m->data,m->allocated*record_size);
 221   }
 222   if(!m->data) @<Fatal error due to lack of memory@>;
 223   @<Zero the new record@>;
 224   return m->used-1;
 225 }
 226
 227 @ @<Fatal error due to lack of memory@>= {
 228   fprintf(stderr,"Out of memory\n");
 229 @.Out of memory@>
 230   exit(1);
 231 }
 232
 233 @ @<Zero the new record@>= {
 234   memset(m->data+(record_size*(m->used-1)),0,record_size);
 235 }
 236
 237 @ Now just one more thing. It is useful to have a |foreach| macro to
 238 iterate the areas.
 239
 240 @d foreach(_var,_area) for(_var=0;_var<_area.used;_var++)@;
 241 @f foreach while
 242
 243 @*Symbolic Names. There will be some names defined for the use of naming
 244 subroutines, symbolic constants, patterns, card areas, etc. These names
 245 are stored in a |managed_memory| called |names|.
 246
 247 It also stores references to other things (defined in later chapters). The
 248 numeric value of a name in |names.data[x]| is |x+256|.
 249
 250 @<Late Typedefs@>=
 251 typedef struct {
 252   char*name;
 253   @<More elements of |name_data|@>@;
 254 } name_data;
 255
 256 @ @<Global variables@>=
 257 memory_of(name_data) names;
 258
 259 @ @<Initialize memory@>= init_memory(names,16);
 260
 261 @ This subroutine finds a name, adding it if necessary. The number
 262 corresponding to it (as described above) will be the return value.
 263
 264 @-p int find_name(char*name) {
 265   @<Search for the |name| in |names|@>;
 266   @<Add the new name (it was not found)@>;
 267 }
 268
 269 @ @<Search for the |name| in |names|@>= {
 270   int i;
 271   foreach(i,names) {
 272     if(!strcmp(names.data[i].name,name)) return i+256;
 273   }
 274 }
 275
 276 @ @<Add the new name (it was not found)@>= {
 277   int n=new_record(names);
 278   names.data[n].name=strdup(name);
 279   return n+256;
 280 }
 281
 282 @ A macro will be useful to access the data from a number.
 283
 284 @d name_info(_num) names.data[(_num)-0x0100]
 285
 286 @ This code lists the names. It is used for a diagnostic purpose.
 287
 288 @<Display the list of names@>= {
 289   int n;
 290   foreach(n,names) {
 291     printf("%d \"%s\" ",n+256,names.data[n].name);
 292     @<Display other fields of |names.data[n]|@>;
 293     printf("\n");
 294   }
 295 }
 296
 297 @*Storage of Tokens. Tokens are stored as 16-bit numbers. Values |0x0020|
 298 to |0x00FF| represent those ASCII characters, and |0x0000| to |0x001F| are
 299 ASCII control codes. Higher numbers represent an index into the |names|
 300 array (where |0x0101| represents |names.data[0x0001]|).
 301
 302 @<Typedefs@>=
 303 @q[data type of tokens]@>
 304 typedef unsigned short token;
 305
 306 @ This section lists the ASCII control codes which can be used. Some of
 307 them have slightly different meaning from the ASCII standard.
 308
 309 @d null_char 0x00 // end of a |raw_data| string or similar things
 310 @d pre_null_char 0x01 // becomes |null_char|
 311 @d end_transmission 0x04 // marks the end of the last card in this area
 312 @d tabulation 0x09 // represents a tab in a {\TeX} alignment
 313 @d raw_data 0x10 // enter raw {\TeX} mode
 314 @d whatsit 0x1A // a token for converting into a name token
 315 @d escape_code 0x1B // represents a {\TeX} control sequence introducer
 316 @d record_separator 0x1E // marks the end of a card
 317 @d field_separator 0x1F // marks the end of a field of a card
 318 @d start_name_code 0x0100
 319
 320 @ These tokens are used in card areas, which are defined (and described)
 321 in the next chapter.
 322
 323 @*Cards. The data of the cards is stored in card areas. Each card area
 324 is a list of tokens, terminated by |record_separator|. The final card in
 325 the area is terminated by |end_transmission|.
 326
 327 @<Typedefs@>=
 328 typedef struct {
 329   token*tokens;
 330   int allocated;
 331   int used;
 332 } card_area_data;
 333
 334 @ @<More elements of |name_data|@>=
 335   boolean has_card_area;
 336   data_index card_area;
 337
 338 @ @<Global variables@>=
 339 memory_of(card_area_data) card_areas;
 340
 341 @ @<Initialize memory@>= init_memory(card_areas,1);
 342
 343 @ A new card area is created with this.
 344
 345 @-p data_index set_card_area(int num) {
 346   name_data*m=&name_info(num);
 347   @<Use the card area which is already set, if able@>;
 348   @<Otherwise, create a new card area and use the new one@>;
 349 }
 350
 351 @ @<Use the card area which is already set, if able@>= {
 352   if(m->has_card_area) return m->card_area;
 353 }
 354
 355 @ @<Otherwise, create a new card area and use the new one@>= {
 356   data_index n=new_record(card_areas);
 357   m->has_card_area=1;
 358   card_areas.data[n].allocated=0x100;
 359   card_areas.data[n].tokens=malloc(0x100*sizeof(token));
 360   card_areas.data[n].used=0;
 361   return n;
 362 }
 363
 364 @ This subroutine sends a token to a card area.
 365
 366 @-p void send_token(data_index a,token x) {
 367   if(card_areas.data[a].allocated<card_areas.data[a].used+4)
 368     @<Double the allocation of card area tokens@>;
 369   card_areas.data[a].tokens[card_areas.data[a].used++]=x;
 370 }
 371
 372 @ @<Double the allocation of card area tokens@>= {
 373   int n=(card_areas.data[a].allocated*=2)*sizeof(token);
 374   card_areas.data[a].tokens=realloc(card_areas.data[a].tokens,n);
 375 }
 376
 377 @ @<Display other fields of |names.data[n]|@>= {
 378   if(names.data[n].has_card_area)
 379     printf("C(%d) ",names.data[n].card_area);
 380 }
 381
 382 @ The code in this section is used to ensure that each card area is
 383 properly terminated with |end_transmission| marker, so that when it is
 384 time to write the output files, it will know when to stop.
 385
 386 @<Send |end_transmission| to each card area@>= {
 387   data_index a;
 388   foreach(a,card_areas) send_token(a,end_transmission);
 389 }
 390
 391 @*Patterns. For pattern matching, we store the patterns in one memory
 392 managed area. The index of the beginning of each pattern area is stored
 393 in the |names| list.
 394
 395 These constants are special codes which can occur in the |text| string
 396 of a pattern.
 397
 398 @d begin_capture 1
 399 @d end_capture 2
 400 @d match_keyword 3 // match a keyword followed by a character in a table
 401 @d match_table 4 // match a character using a table
 402 @d optional_table 5 // match a character optional using a table
 403 @d failed_match 6
 404 @d jump_table 7 // use a table to jump to a marker
 405 @d successful_match 8
 406 @d back_one_space 9
 407 @d forward_one_space 10
 408 @d match_left_side 11 // match at beginning of line
 409 @d match_right_side 12 // match at end of line
 410 @d match_eight_bit 13 // match 8-bit encodings and control characters
 411
 412 @<Typedefs@>=
 413 typedef struct {
 414   char*text;
 415   unsigned int category; // category for keywords
 416   data_index subroutine;
 417   data_index next;
 418 } pattern_data;
 419
 420 @ @<More elements of |name_data|@>=
 421   boolean has_pattern_area;
 422   data_index pattern_area;
 423
 424 @ @<Global variables@>=
 425 memory_of(pattern_data) pattern_areas;
 426
 427 @ @<Initialize memory@>= init_memory(pattern_areas,4);
 428
 429 @ @<Display other fields of |names.data[n]|@>= {
 430   if(names.data[n].has_pattern_area)
 431     printf("P(%d) ",names.data[n].pattern_area);
 432 }
 433
 434 @ A new pattern area is created with this. The patterns in an area are
 435 stored like a linked list. The last one with |next| pointing to nothing,
 436 is the terminator entry.
 437
 438 @-p data_index set_pattern_area(int num) {
 439   name_data*m=&name_info(num);
 440   @<Use the pattern area which is already set, if able@>;
 441   @<Otherwise, create a new pattern area and use the new one@>;
 442 }
 443
 444 @ @<Use the pattern area which is already set, if able@>= {
 445   if(m->has_pattern_area) return m->pattern_area;
 446 }
 447
 448 @ @<Otherwise, create a new pattern area and use the new one@>= {
 449   data_index n=new_record(pattern_areas);
 450   m->has_pattern_area=1;
 451   pattern_areas.data[n].subroutine=none;
 452   pattern_areas.data[n].next=none;
 453   return n;
 454 }
 455
 456 @ @<Display the list of patterns@>= {
 457   int i;
 458   foreach(i,pattern_areas) {
 459     if(pattern_areas.data[i].text) {
 460       printf("%d:%08X:%d:%d\n",i,pattern_areas.data[i].category
 461         ,pattern_areas.data[i].subroutine,pattern_areas.data[i].next
 462       );
 463       display_string(pattern_areas.data[i].text);
 464       printf("\n");
 465     }
 466   }
 467 }
 468
 469 @*Keywords. Keywords means words which can be placed on the card and which
 470 can have special meanings, and possibly reminder text.
 471
 472 Keywords are stored in a large list in only one keyword area. A category
 473 can be given a name, which will automatically be assigned for the next bit
 474 of the keyword category when it is entered the first time.
 475
 476 @<Typedefs@>=
 477 typedef struct {
 478   char*match; // match text (can contain pattern codes)
 479   unsigned int category; // bitfield of categories
 480   int extra1;
 481   int extra2;
 482   char*replacement; // replacement text or reminder text
 483 } keyword_data;
 484
 485 @ @<Global variables@>=
 486 unsigned int next_keyword_category=1;
 487 memory_of(keyword_data) keywords;
 488
 489 @ @<Initialize memory@>= init_memory(keywords,4);
 490
 491 @ A keyword category is found (and created, if it is not found) using the
 492 following code.
 493
 494 @-p unsigned int find_category(char*name) {
 495   int i=find_name(name);
 496   if(name_info(i).value.number) {
 497     return name_info(i).value.number;
 498   } @+else if(!name_info(i).value.is_string) {
 499     name_info(i).value.number=next_keyword_category;
 500     next_keyword_category<<=1;
 501     if(!next_keyword_category)
 502       fprintf(stderr,"Too many keyword categories: %s\n",name);
 503 @.Too many keyword categories@>
 504     return name_info(i).value.number;
 505   }
 506 }
 507
 508 @ Some stack code commands are used when dealing with reading/writing
 509 keyword info.
 510
 511 In order that you might be able to iterate them, it will exit out of the
 512 current block when trying to read nonexisting keyword info instead of
 513 displaying an error message.
 514
 515 @<Cases for system commands@>=
 516   @-case 'k': {
 517     // Read keyword info
 518     if(registers['K'].number<0 || registers['K'].number>=keywords.used)
 519       return 0;
 520     push_num(keywords.data[registers['K'].number].extra1);
 521     push_num(keywords.data[registers['K'].number].extra2);
 522     push_string(keywords.data[registers['K'].number].replacement);
 523     break;
 524   }@#
 525   @-case 'K': {
 526     // Write keyword info
 527     if(registers['K'].number<0 || registers['K'].number>=keywords.used)
 528       program_error("Out of range");
 529     free(keywords.data[registers['K'].number].replacement);
 530     keywords.data[registers['K'].number].replacement=pop_string();
 531     keywords.data[registers['K'].number].extra2=pop_num();
 532     keywords.data[registers['K'].number].extra1=pop_num();
 533     break;
 534   }@#
 535
 536 @ @<Display the list of keywords@>= {
 537   int i;
 538   foreach(i,keywords) {
 539     display_string(keywords.data[i].match);
 540     printf("  [%d:%08X:%d:%d:%d]\n",i,keywords.data[i].category
 541       ,keywords.data[i].extra1,keywords.data[i].extra2
 542       ,strlen(keywords.data[i].replacement)
 543     );
 544   }
 545 }
 546
 547 @*Card List. A sorted summary list of the cards is kept in one list,
 548 having thirty-two general-purpose numeric fields, and a pointer to the
 549 beginning of the record (usually the name in which it will be indexed by).
 550
 551 @<Typedefs@>=
 552 typedef struct {
 553   int token_ptr;
 554   int field[32];
 555   int amount_in_pack; // used in pack generation
 556 } list_entry;
 557
 558 @ @<Global variables@>=
 559 memory_of(list_entry) card_list;
 560
 561 @ @<Initialize memory@>= init_memory(card_list,16);
 562
 563 @*Deck Lists. Deck lists involve lists of cards or rules for cards that
 564 belong to a deck or pack.
 565
 566 @^booster pack@>
 567
 568 There is one macro |lflag| here just to convert letters to bit flags. For
 569 example |lflag('a')| is the least significant bit.
 570
 571 @d lflag(_ch) (1<<((_ch)-'a'))
 572
 573 @<Typedefs@>=
 574 typedef struct {
 575   int amount;
 576   unsigned int flags;
 577   char*name;
 578   data_index next;
 579 } deck_entry;
 580
 581 @ @<Global variables@>=
 582 memory_of(deck_entry) deck_lists;
 583
 584 @ @<More elements of |name_data|@>=
 585   boolean has_deck_list;
 586   data_index deck_list;
 587
 588 @ @<Initialize memory@>= init_memory(deck_lists,4);
 589
 590 @ A new deck list is created with this. The deck entries are stored like a
 591 linked list. The terminator has |next| pointing to |none|.
 592
 593 @-p data_index set_deck_list(int num) {
 594   name_data*m=&name_info(num);
 595   @<Use the deck list which is already set, if able@>;
 596   @<Otherwise, create a new deck list and use the new one@>;
 597 }
 598
 599 @ @<Use the deck list which is already set, if able@>= {
 600   if(m->has_deck_list) return m->deck_list;
 601 }
 602
 603 @ @<Otherwise, create a new deck list and use the new one@>= {
 604   data_index n=new_record(deck_lists);
 605   m->has_deck_list=1;
 606   deck_lists.data[n].next=none;
 607   return n;
 608 }
 609
 610 @ @<Display the deck list@>= {
 611   data_index i;
 612   foreach(i,deck_lists) {
 613     printf("%d ",i);
 614     if(deck_lists.data[i].name) display_string(deck_lists.data[i].name);
 615     else printf("-");
 616     printf(" [%08X:%d:%d]\n",deck_lists.data[i].flags
 617       ,deck_lists.data[i].amount,deck_lists.data[i].next);
 618   }
 619 }
 620
 621 @*Word Forms. These structures are used to store word form rules, such as
 622 plurals\biblio{Conway, Damian. ``An Algorithmic Approach to English
 623 Pluralization''. \hskip 0pt plus 1in\hbox{}
 624 \.{http://www.csse.monash.edu.au/\~damian/papers/HTML/Plurals.html}}. You
 625 can store up to four different kinds, in case of languages other than
 626 English.
 627
 628 @^Conway, Damian@>
 629 @^plurals@>
 630
 631 @<Typedefs@>=
 632 typedef struct {
 633   int level;
 634   data_index next;
 635   unsigned char orig[32];
 636   unsigned char dest[32];
 637   boolean left_boundary;
 638   boolean right_boundary;
 639 } word_form_entry;
 640
 641 @ @<Global variables@>=
 642 memory_of(word_form_entry) word_forms;
 643
 644 @ @<Initialize memory@>= {
 645   int i;
 646   init_memory(word_forms,16);
 647   word_forms.used=8;
 648   for(i=0;i<8;i+=2) {
 649     word_forms.data[i].orig[0]=word_forms.data[i].dest[0]=0;
 650     word_forms.data[i].next=i+1;
 651     word_forms.data[i].level=0x7FFFFFFF;
 652     word_forms.data[i+1].orig[0]=word_forms.data[i+1].dest[0]=0;
 653     word_forms.data[i+1].next=none;
 654     word_forms.data[i+1].level=0;
 655   }
 656 }
 657
 658 @ Word form rules are added and then inserted in the correct place in the
 659 linked list using the |next| field. Entries with a higher numbered level
 660 take higher priority, therefore will be placed before the ones with lower
 661 numbered level. Next, longer |orig| strings come before shorter strings,
 662 since they might be more specific forms of the others and will therefore
 663 override them.
 664
 665 @-p data_index add_word_form(int kind,int level,char*orig,char*dest) {
 666   data_index n=new_record(word_forms);
 667   @<Set the fields of the new word form rule@>;
 668   @<Insert the new word form rule into the linked list@>;
 669   return n;
 670 }
 671
 672 @ The |left_boundary| and |right_boundary| fields specify if they should
 673 match only at the boundary. Characters are checked using the \.W table and
 674 removed from the string to place in the list.
 675
 676 @d last_character(_str) ((_str)[strlen(_str)-1])
 677
 678 @<Set the fields of the new word form rule@>= {
 679   word_forms.data[n].level=level;
 680   strcpy(word_forms.data[n].orig,orig+(tables['W'][*orig]==2));
 681   word_forms.data[n].left_boundary=(tables['W'][*orig]==2);
 682   if((word_forms.data[n].right_boundary=
 683    (tables['W'][last_character(word_forms.data[n].orig)]==3)))
 684     last_character(word_forms.data[n].orig)=0;
 685   strcpy(word_forms.data[n].dest,dest+(tables['W'][*dest]==2));
 686   if(tables['W'][last_character(word_forms.data[n].dest)]==3)
 687     last_character(word_forms.data[n].dest)=0;
 688 }
 689
 690 @ @<Insert the new word form rule into the linked list@>= {
 691   data_index y=(kind&3)<<1; // previous item to |x|
 692   data_index x=word_forms.data[y].next; // current item
 693   int s=strlen(orig);
 694   for(;x!=none;y=x,x=word_forms.data[y].next) {
 695     if(word_forms.data[x].next==none) break;
 696     @#if(word_forms.data[x].level<level) break;
 697     if(word_forms.data[x].level>level) continue;
 698     @#if(strlen(word_forms.data[x].orig)<s) break;
 699   }
 700   word_forms.data[y].next=n;
 701   word_forms.data[n].next=x;
 702 }
 703
 704 @ Now to do computation of changing a word by word forms. This function
 705 expects only one word from input, or multiple words where the last one
 706 should be the word to be converted. Uppercase letters are converted to
 707 lowercase for conversion (but not the other way around), but if the
 708 letters are uppercase in the input, the output will also have uppercase
 709 letters on those positions. The algorithm starts from the right side of
 710 the input string.
 711
 712 The parameter |src| is the input, and |dest| should point to a buffer
 713 which is large enough to store the output string.
 714
 715 @^plurals@>
 716
 717 @-p data_index reform_word(int kind,char*src,char*dest) {
 718   char*l=src+strlen(src);
 719   data_index n=word_forms.data[(kind&3)<<1].next;
 720   strcpy(dest,src); // this is used later
 721   @<Try each word form rule, following the |next| pointers@>;
 722   return none; // in case there is nothing to do
 723 }
 724
 725 @ @<Try each word form rule, following the |next| pointers@>= {
 726   char*p;
 727   int s;
 728   while(n!=none && word_forms.data[n].next!=none) {
 729     s=strlen(word_forms.data[n].orig); @+ p=l-s;
 730     @<Check the characters matching from |p|, going backwards@>;
 731     n=word_forms.data[n].next;
 732   }
 733 }
 734
 735 @ Look ahead for the definition of |wcasecmp| (true means it matches).
 736
 737 @<Check the characters matching from |p|, going backwards@>= {
 738   for(;;) {
 739     if((!word_forms.data[n].left_boundary || p==src
 740      || tables['W'][p[-1]])
 741       && wcasecmp(word_forms.data[n].orig,p))
 742        @<A match to the word form rules has been found@>;
 743     @<Go backwards, stop if we are not allowed to continue backwards@>;
 744   }
 745 }
 746
 747 @ @<A match to the word form rules has been found@>= {
 748   char*o=dest+(p-src);
 749   sprintf(o,"%s%s",word_forms.data[n].dest,p+s);
 750   @<Change the capitalization to match the original@>;
 751   return n;
 752 }
 753
 754 @ Remember, that for example if ``cow'' becomes ``kine'', then ``Cow''
 755 will become ``Kine''. So, it will retain capitalization.
 756
 757 @^cows@>
 758
 759 @<Change the capitalization to match the original@>= {
 760   char*q=word_forms.data[n].orig;
 761   for(;*p && *q;p++,o++,q++)
 762     if(*p==tables['U'][*q] && *p!=tables['L'][*q]) *o=tables['U'][*o];
 763 }
 764
 765 @ @<Go backwards, stop if we are not allowed to continue backwards@>= {
 766   if(word_forms.data[n].right_boundary) break; // matches only on boundary
 767   if(tables['W'][p[s]]) break; // only the last word(s) can be matched
 768   if(p--==src) break; // stop at beginning
 769 }
 770
 771 @ This function is defined to compare strings in the way needed for
 772 matching word forms, including case conversion. The lowercase letters in
 773 the |shorter| string are permitted to match lowercase and uppercase
 774 letters in the |longer| string, and the |shorter| string is permitted to
 775 be shorter and still match.
 776
 777 @-p boolean wcasecmp(char*shorter,char*longer) {
 778   for(;;shorter++,longer++) {
 779     if(!*shorter) return 1;
 780     if(!*longer) return 0;
 781     if(*shorter!=*longer && *shorter!=tables['L'][*longer]) return 0;
 782   }
 783 }
 784
 785 @ Of course it is now needed a command that can access these features from
 786 within a \TeX nicard template. The |level| of the matched rule is also
 787 returned, in case your program might use that information for something.
 788
 789 @<Cases for system commands@>=
 790   @-case 'W': {
 791     // Convert a word form
 792     int k=pop_num();
 793     char*o=pop_string();
 794     char q[1500];
 795     data_index n=reform_word(k,o,q);
 796     push_string(q);
 797     if(n==none) push_num(0);
 798     else push_num(word_forms.data[n].level);
 799     free(o);
 800     break;
 801   }@#
 802
 803 @ @<Display the list of word form rules@>= {
 804   data_index i;
 805   foreach(i,word_forms) {
 806     printf("%d %c\"",i,word_forms.data[i].left_boundary?'[':' ');
 807     display_string(word_forms.data[i].orig);
 808     printf("\"%c -> \"",word_forms.data[i].right_boundary?']':' ');
 809     display_string(word_forms.data[i].dest);
 810     printf("\" %d >%d\n",word_forms.data[i].level
 811       ,word_forms.data[i].next);
 812   }
 813 }
 814
 815 @*Random Number Generation. This program uses the Xorshift algorithm,
 816 invented by George Marsaglia\biblio{Marsaglia (July 2003). ``Xorshift
 817 RNGs''. Journal of Statistical Software Vol.~8 (Issue 14). {\tt
 818 http://www.jstatsoft.org/v08/i14/paper}.}.
 819
 820 @^Marsaglia, George@>
 821 @^random numbers@>
 822
 823 @<Global variables@>=
 824 unsigned int rng_x;
 825 unsigned int rng_y;
 826 unsigned int rng_z;
 827 unsigned int rng_w;
 828
 829 @ @<Initialize the random number generator@>= {
 830 @q[initialize the random seed::]@>
 831   rng_seed((unsigned int)time(0));
 832 @q[::initialize the random seed]@>
 833 }
 834
 835 @ The seed parameters for the random number generator will be seeded using
 836 the linear congruential generator, which is a simpler generator which can
 837 be used to seed it with.
 838
 839 The parameters |lcg_a| and |lcg_c| are parameters to the linear
 840 congruential generator algorithm. The values used here are the same as
 841 those used in GNU C. In this program they will be specified explicitly so
 842 that you can get identical output on different computers.
 843
 844 @d lcg_a 1103515245
 845 @d lcg_c 12345
 846
 847 @-p void rng_seed(unsigned int x) {
 848   rng_x=x=lcg_a*x+lcg_c;
 849   rng_y=x=lcg_a*x+lcg_c;
 850   rng_z=x=lcg_a*x+lcg_c;
 851   rng_w=x=lcg_a*x+lcg_c;
 852 }
 853
 854 @ There is a command to reseed it using a constant (so that you can
 855 generate the same numbers on different computers).
 856
 857 @<Cases for system commands@>=
 858   @-case 'U': {
 859     // Reseed the random number generator
 860     if(stack_ptr->is_string) program_error("Type mismatch");
 861     rng_seed(pop_num());
 862     break;
 863   }@#
 864
 865 @ And now follows the algorithm for generating random numbers. One change
 866 has been made so that once it is modulo, all number will still be of equal
 867 probability.
 868
 869 Numbers are generated in the range from 0 up to but not including |limit|.
 870
 871 @d max_uint ((unsigned int)(-1))
 872
 873 @-p unsigned int gen_random(unsigned int limit) {
 874   unsigned int r=max_uint-(max_uint%limit); // range check
 875   for(;;) {
 876     @<Make the next number |rng_w|...@>;
 877     @<Check the range, try again if out of range, else |return|@>;
 878   }
 879 }
 880
 881 @ @<Make the next number |rng_w| by Xorshift algorithm@>= {
 882   unsigned int t = rng_x ^ (rng_x << 11);
 883   rng_x = rng_y; @+ rng_y = rng_z; @+ rng_z = rng_w;
 884   rng_w ^= (rng_w >> 19) ^ t ^ (t >> 8);
 885 }
 886
 887 @ @<Check the range, try again if out of range, else |return|@>= {
 888   if(rng_w<=r) return rng_w%limit;
 889 }
 890
 891 @ @<Cases for system commands@>=
 892   @-case 'u': {
 893     // Generate a random number
 894     if(stack_ptr->is_string) program_error("Type mismatch");
 895     stack_ptr->number=gen_random(stack_ptr->number);
 896     break;
 897   }@#
 898
 899 @*Stack Programming Language. Now we get to the part where the user can
 900 enter a program, in order to control the features of this program. The
 901 programming language used is like \.{dc}, but different.
 902
 903 @.dc@>
 904
 905 Subroutines are simply stored as strings in the |names| area, since they
 906 are the same as registers.
 907
 908 @ Now we have the storage of registers. Registers 0 to 255 are stored in
 909 this separate list, while other register values are just stored in the
 910 |names| list. There is also a stack, which has storage of the same values
 911 as registers can contain.
 912
 913 @d max_stack 0x1000
 914
 915 @<Typedefs@>=
 916 typedef struct {
 917   boolean is_string;
 918   union @+{
 919     int number;
 920     unsigned char*text;
 921   }@+;
 922 } register_value;
 923
 924 @ @<More elements of |name_data|@>=
 925   register_value value;
 926
 927 @ @<Global variables@>=
 928 register_value registers[256];
 929 register_value stack[max_stack];
 930 register_value*stack_ptr=stack-1; // current top of stack element
 931
 932 @ Here are some codes for pushing and popping the stack.
 933
 934 @d pop_num() ((stack_ptr--)->number)
 935
 936 @-p inline void push_string(char*s) {
 937   ++stack_ptr;
 938   stack_ptr->is_string=1;
 939   stack_ptr->text=strdup(s);
 940 }
 941
 942 @ @-p inline void push_num(int n) {
 943   ++stack_ptr;
 944   stack_ptr->is_string=0;
 945   stack_ptr->number=n;
 946 }
 947
 948 @ @-p inline void stack_dup(void) {
 949   if((stack_ptr[1].is_string=stack_ptr->is_string)) {
 950     stack_ptr[1].text=strdup(stack_ptr->text);
 951   } @+else {
 952     stack_ptr[1].number=stack_ptr->number;
 953   }
 954   stack_ptr++;
 955 }
 956
 957 @ @-p inline void stack_drop(void) {
 958   if(stack_ptr->is_string) free(stack_ptr->text);
 959   --stack_ptr;
 960 }
 961
 962 @ @-p inline char*pop_string(void) {
 963   char*p=stack_ptr->text;
 964   stack_ptr->is_string=0; stack_ptr->text=0;
 965   --stack_ptr;
 966   return p;
 967 }
 968
 969 @ Also, some subroutines are needed here in order to deal with registers.
 970
 971 For |fetch_code|, the string |"0[]+"| is returned if it is not a string,
 972 generating a ``Type mismatch'' error when you try to run it.
 973
 974 @-p inline char*fetch_code(int r) {
 975   if(!(r&~0xFF)) {
 976     if(!registers[r].is_string) return "0[]+";
 977     return registers[r].text;
 978   } @+else {
 979     if(!name_info(r).value.is_string) return "0[]+";
 980     return name_info(r).value.text;
 981   }
 982 }
 983
 984 @ @-p inline void fetch(int r) {
 985   register_value*v;
 986   if(!(r&~0xFF)) v=&(registers[r]);
 987   else v=&(name_info(r).value);
 988   (++stack_ptr)->is_string=v->is_string;
 989   if(v->is_string) {
 990     stack_ptr->text=strdup(v->text);
 991   } @+else {
 992     stack_ptr->number=v->number;
 993   }
 994 }
 995
 996 @ @-p inline void store(int r) {
 997   register_value*v;
 998   if(!(r&~0xFF)) v=&(registers[r]);
 999   else v=&(name_info(r).value);
1000   if(v->is_string) free(v->text);
1001   v->is_string=stack_ptr->is_string;
1002   if(v->is_string) {
1003     v->text=stack_ptr->text;
1004   } @+else {
1005     v->number=stack_ptr->number;
1006   }
1007   --stack_ptr;
1008 }
1009
1010 @ There is also a save stack. This save stack stores the saved values of
1011 the registers |'0'| to |'9'|, so that you can have local variables in a
1012 subroutine.
1013
1014 @<Global variables@>=
1015 register_value save_stack[520];
1016 register_value*save_stack_ptr=save_stack;
1017
1018 @ These codes deal with the save stack. Strings will be copied when
1019 saving. When loading, strings that were previously in the registers will
1020 be freed.
1021
1022 @<Save local registers to the save stack@>= {
1023   int i;
1024   for(i='0';i<='9';i++) {
1025     *save_stack_ptr=registers[i];
1026     if(registers[i].is_string)
1027       save_stack_ptr->text=strdup(save_stack_ptr->text);
1028     save_stack_ptr++;
1029   }
1030 }
1031
1032 @ @<Load local registers from the save stack@>= {
1033   int i;
1034   for(i='9';i>='0';i--) {
1035     if(registers[i].is_string) free(registers[i].text);
1036     registers[i]=*--save_stack_ptr;
1037   }
1038 }
1039
1040 @*Commands for Stack Programming Language. Finally, is the code where it
1041 can be executed. The return value of this function indicates how many
1042 levels should be exit when it is called.
1043
1044 @-p int execute_program(unsigned char*prog) {
1045   unsigned char*ptr=prog;
1046 reset_execute_program:
1047   for(;*ptr;ptr++) {
1048     switch(*ptr) {
1049       @<Cases for literal data commands@>@;
1050       @<Cases for stack manipulation commands@>@;
1051       @<Cases for arithmetic commands@>@;
1052       @<Cases for flow-control commands@>@;
1053       @<Cases for register/table operation commands@>@;
1054       @<Cases for string commands@>@;
1055       @<Cases for condition/compare commands@>@;
1056       @<Cases for local registers commands@>@;
1057       @<Cases for system commands@>@;
1058       @-case '?': @<Do a diagnostics command@>@;@+break;
1059       default:
1060         if(*ptr>='0' && *ptr<='9') {
1061           @<Read a literal number and push to stack@>;
1062         } @+else if(0x80&*ptr) {
1063           @<Execute a subroutine code from the current character@>;
1064         }
1065         break;
1066     }
1067     if(stack_ptr<stack-1) program_error("Stack underflow");
1068     if(stack_ptr>stack+max_stack) program_error("Stack overflow");
1069   }
1070   return 0;
1071 }
1072
1073 @ @<Cases for literal data commands@>=
1074   @-case '`': {
1075     // Literal ASCII character
1076     push_num(*++ptr);
1077     break;
1078   }@#
1079   @-case '[': {
1080     // Literal string
1081     @<Read a literal string and push to stack@>;
1082     break;
1083   }@#
1084   @-case '(': {
1085     // Literal name
1086     @<Read a literal name and push its number to the stack@>;
1087     break;
1088   }@#
1089
1090 @ @<Read a literal number and push to stack@>= {
1091   int n=0;
1092   while(*ptr>='0' && *ptr<='9') n=10*n+(*ptr++)-'0';
1093   --ptr;
1094   push_num(n);
1095 }
1096
1097 @ @<Read a literal string and push to stack@>= {
1098   char*p=++ptr;
1099   int n=1;
1100   while(n && *ptr) {
1101     if(*ptr=='[') ++n;
1102     if(*ptr==']') --n;
1103     if(n) ptr++;
1104   }
1105   if(!*ptr) program_error("Unterminated string literal");
1106   *ptr=0;
1107   push_string(p);
1108   *ptr=']';
1109 }
1110
1111 @ @<Read a literal name and push its number to the stack@>= {
1112   char*p=++ptr;
1113   while(*ptr && *ptr!=')') ptr++;
1114   if(!*ptr) program_error("Unterminated string literal");
1115   *ptr=0;
1116   push_num(find_name(p));
1117   *ptr=')';
1118 }
1119
1120 @ @<Cases for stack manipulation commands@>=
1121   @-case 'D': {
1122     // Drop top item of stack
1123     stack_drop();
1124     break;
1125   }@#
1126   @-case 'c': {
1127     // Clears the stack, rendering it empty
1128     while(stack_ptr>=stack) stack_drop();
1129     break;
1130   }@#
1131   @-case 'd': {
1132     // Duplicates the value on top of the stack.
1133     stack_dup();
1134     break;
1135   }@#
1136   @-case 'r': {
1137     // Swaps the top two values on the stack
1138     stack_ptr[1]=stack_ptr[0];
1139     stack_ptr[0]=stack_ptr[-1];
1140     stack_ptr[-1]=stack_ptr[1];
1141     break;
1142   }@#
1143
1144 @ @<Cases for arithmetic commands@>=
1145   @-case '+': {
1146     // Add two numbers, or concatenate two strings
1147     if(stack_ptr->is_string) {
1148       @<Concatenate strings on the stack@>;
1149     }@+ else {
1150       int n=pop_num();
1151       if(stack_ptr->is_string)
1152         program_error("Type mismatch");
1153       stack_ptr->number+=n;
1154     }
1155     break;
1156   }@#
1157   @-case '-': {
1158     // Subtract two numbers, or compare two strings
1159     if(stack_ptr->is_string) {
1160       @<Compare strings on the stack@>;
1161     }@+ else {
1162       int n=pop_num();
1163       if(stack_ptr->is_string)
1164         program_error("Type mismatch");
1165       stack_ptr->number-=n;
1166     }
1167     break;
1168   }@#
1169   @-case '*': {
1170     // Multiply two numbers
1171     int n=pop_num();
1172     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1173       program_error("Number expected");
1174     stack_ptr->number*=n;
1175     break;
1176   }@#
1177   @-case '/': {
1178     // Divide two numbers
1179     int n=pop_num();
1180     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1181       program_error("Number expected");
1182     if(n==0) program_error("Division by zero");
1183     stack_ptr->number/=n;
1184     break;
1185   }@#
1186   @-case '%': {
1187     // Modulo of two numbers
1188     int n=pop_num();
1189     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1190       program_error("Number expected");
1191     if(n==0) program_error("Division by zero");
1192     stack_ptr->number%=n;
1193     break;
1194   }@#
1195
1196 @ @<Concatenate strings on the stack@>= {
1197   char*s=pop_string();
1198   char*q;
1199   if(!stack_ptr->is_string) program_error("Type mismatch");
1200   q=malloc(strlen(s)+strlen(stack_ptr->text)+1);
1201   strcpy(q,stack_ptr->text);
1202   strcpy(q+strlen(q),s);
1203   stack_drop();
1204   push_string(q);
1205   free(q);
1206   free(s);
1207 }
1208
1209 @ @<Compare strings on the stack@>= {
1210   char*s=pop_string();
1211   char*q=pop_string();
1212   push_num(strcmp(q,s));
1213   free(q);
1214   free(s);
1215 }
1216
1217 @ @<Cases for flow-control commands@>=
1218   @-case 'Q': {
1219     // Exit from multiple levels
1220     int q=pop_num();
1221     if(q>0) return q-1;
1222     break;
1223   }@#
1224   @-case 'Y': {
1225     // Go back to beginning
1226     ptr=prog-1;
1227     break;
1228   }@#
1229   @-case 'q': {
1230     // Exit from two levels
1231     return 1;
1232     break;
1233   }@#
1234   @-case 'x': {
1235     // Execute code from top of stack
1236     @<Execute a string or subroutine code from top of stack@>;
1237     break;
1238   }@#
1239
1240 @ Note here, it is a recursive function call.
1241 @^recursive@>
1242
1243 @<Execute a string or subroutine code from top of stack@>= {
1244   if(stack_ptr->is_string) {
1245     char*p=pop_string();
1246     int q=execute_program(p);
1247     free(p);
1248     if(q) return q-1;
1249   } @+else {
1250     char*p=fetch_code(pop_num());
1251     int q=execute_program(p);
1252     if(q) return q-1;
1253   }
1254 }
1255
1256 @ Since the extended characters (|0x80| to |0xFF|) do not correspond to
1257 any commands, here we can use them to execute a subroutine code, allowing
1258 many things related to self-modifying code (and other stuff) to be done
1259 that would be difficult otherwise.
1260
1261 @<Execute a subroutine code from the current character@>= {
1262   char*p=fetch_code(*ptr);
1263   int q=execute_program(p);
1264   if(q) return q-1;
1265 }
1266
1267 @ @<Cases for register/table operation commands@>=
1268   @-case ':': {
1269     // Store value to table
1270     int n;
1271     if(stack_ptr->is_string) program_error("Number expected");
1272     n=pop_num();
1273     tables[0x7F&*++ptr][n]=pop_num();
1274     break;
1275   }@#
1276   @-case ';': {
1277     // Load value from table
1278     stack_ptr->number=tables[0x7F&*++ptr][stack_ptr->number];
1279     break;
1280   }@#
1281   @-case 'L': {
1282     // Load value from register named by stack
1283     if(stack_ptr->is_string) program_error("Number expected");
1284     fetch(pop_num());
1285     break;
1286   }@#
1287   @-case 'S': {
1288     // Store value in register named by stack
1289     if(stack_ptr->is_string) program_error("Number expected");
1290     store(pop_num());
1291     break;
1292   }@#
1293   @-case 'l': {
1294     // Load value from register
1295     fetch(*++ptr);
1296     break;
1297   }@#
1298   @-case 's': {
1299     // Store value in register
1300     store(*++ptr);
1301     break;
1302   }@#
1303
1304 @ @<Cases for string commands@>=
1305   @-case 'B': {
1306     // Put brackets around a string, or convert number to text
1307     if(stack_ptr->is_string) {
1308       @<Put brackets around string at top of stack@>;
1309     } @+else {
1310       @<Convert top of stack to string representation of a number@>;
1311     }
1312     break;
1313   }@#
1314   @-case 'Z': {
1315     // Calculate number of characters in a string
1316     char*s=pop_string();
1317     push_num(strlen(s));
1318     free(s);
1319     break;
1320   }@#
1321   @-case 'a': {
1322     // ``ASCIIfy'' a number
1323     if(stack_ptr->is_string) {
1324       if(stack_ptr->text[0]) stack_ptr->text[1]=0;
1325     } @+else {
1326       int n=stack_ptr->number;
1327       stack_ptr->is_string=1;
1328       stack_ptr->text=malloc(2);
1329       stack_ptr->text[0]=n;
1330       stack_ptr->text[1]=0;
1331     }
1332     break;
1333   }@#
1334   @-case 'A': {
1335     // Take the first character from the string
1336     char*s=stack_ptr->text;
1337     if(!stack_ptr->is_string || !*s) return 0;
1338     push_num(*s);
1339     stack_ptr[-1].text=strdup(s+1);
1340     free(s);
1341     break;
1342   }@#
1343   @-case 'N': {
1344     // Convert a register number to its name
1345     int n=stack_ptr->number;
1346     if(stack_ptr->is_string) program_error("Type mismatch");
1347     if(n<256 || n>=names.used+256) program_error("Out of range");
1348     stack_drop();
1349     push_string(names.data[n-256].name);
1350     break;
1351   }@#
1352
1353 @ @<Put brackets around string at top of stack@>= {
1354   char*buf=malloc(strlen(stack_ptr->text)+3);
1355   sprintf(buf,"[%s]",stack_ptr->text);
1356   free(stack_ptr->text);
1357   stack_ptr->text=buf;
1358 }
1359
1360 @ @<Convert top of stack to string representation of a number@>= {
1361   char buf[32];
1362   sprintf(buf,"%d",stack_ptr->number);
1363   stack_drop();
1364   push_string(buf);
1365 }
1366
1367 @ Here is how the ``Arithmetic IF'' command works: On the stack you have
1368 any three values at the top, and a number underneath it. Those are all
1369 removed, except one of the three values which is selected based on the
1370 sign of the number (the condition value).
1371
1372 @<Cases for condition/compare commands@>=
1373   @-case 'i': {
1374     // Arithmetic IF
1375     @<Do the ``Arithmetic IF''@>;
1376     break;
1377   }@#
1378   @-case '&': {
1379     // Bitwise AND
1380     int n=pop_num();
1381     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1382       program_error("Number expected");
1383     stack_ptr->number&=n;
1384     break;
1385   }@#
1386
1387 @ Do you like this algorithm? Is this a real question?
1388
1389 @^strange codes@>
1390
1391 @<Do the ``Arithmetic IF''@>= {
1392   register_value v=stack_ptr[-3];
1393   int n=v.number;
1394   n=-(n<0?2:!n);
1395   stack_ptr[-3]=stack_ptr[n];
1396   stack_ptr[n]=v;
1397   stack_drop();@+stack_drop();@+stack_drop();
1398 }
1399
1400 @ @<Cases for local registers commands@>=
1401   @-case '<': {
1402     // Save locals
1403     @<Save local registers to the save stack@>;
1404     break;
1405   }@#
1406   @-case '>': {
1407     // Restore locals
1408     @<Load local registers from the save stack@>;
1409     break;
1410   }@#
1411
1412 @ When there is a program error (such as stack underflow), the following
1413 subroutine is used to handle it.
1414
1415 @d program_error(_text) program_error_(prog,ptr,_text)
1416
1417 @-p void program_error_(char*prog,char*ptr,char*msg) {
1418   fprintf(stderr,"Error in %s on line %d",current_filename,current_line);
1419   fprintf(stderr,"\n! %s\ns%dS%dp%d near \"",msg,stack_ptr-stack,
1420    save_stack_ptr-save_stack,ptr-prog);
1421   @<Display the codes near the part that caused the error@>;
1422   fprintf(stderr,"\"\n");
1423   exit(1);
1424 }
1425
1426 @ @<Display the codes near the part that caused the error@>= {
1427   char buf[32];
1428   char*p=ptr-5;
1429   int i;
1430   if(p<prog || p>ptr) p=prog;
1431   for(i=0;p+i<=ptr && p[i];i++) buf[i]=p[i];
1432   buf[i]=0;
1433   fprintf(stderr,"%s",buf);
1434 }
1435
1436 @*Tables and Registers. The tables must be stored here. There are 128
1437 tables with 256 entries each, each of which can store one byte of data.
1438 These tables are used for converting uppercase/lowercase, for deciding
1439 which characters need to be escaped in \TeX, and so on.
1440
1441 The purposes of the built-in registers are also described in this chapter.
1442 The tables and registers named by uppercase letters are for system use.
1443 The tables and registers named by lowercase can be used by the user.
1444
1445 @<Global variables@>=
1446 unsigned char tables[128][256];
1447
1448 @ Here are the uses of the built-in tables and registers:
1449 @^built-in registers@>
1450 @^built-in tables@>
1451
1452 Register \.A: The current position in the current cards area.
1453
1454 Register \.C: The current cards area.
1455
1456 Register \.D: Dots per inch, multiplied by 100.
1457
1458 Register \.E: The escape character for \TeX. If this is a string, the
1459 entire string is the prefix; otherwise, it is a ASCII number of the
1460 character to be used.
1461
1462 Register \.K: Index number for last keyword entry added. Also used when
1463 dealing with keyword operation commands, and when a keyword is matched in
1464 a pattern.
1465
1466 Register \.P: The current pattern area.
1467
1468 Register \.Q: The parameters for the ImageMagick command-line, separated
1469 by spaces.
1470
1471 Register \.T: Alignment tab character for \TeX. Same considerations apply
1472 as the \.E register.
1473
1474 Register \.U: A code to execute for a deck specification enrty with \.x
1475 flag set.
1476
1477 Register \.V: The version number of this program.
1478
1479 Register \.W: A code which pushes the whatsit replacements onto the stack.
1480 It is initialized to a blank string before each line in a card area. It
1481 should push the replacements in the reverse order of the whatsits, so you
1482 could use a code like this, for example: \.{[(Abracadabra)]lW+sW}
1483
1484 Register \.X: Horizontal coordinate across the page (in pixels).
1485
1486 Register \.Y: Vertical coordinate across the page (in pixels).
1487
1488 Register \.Z: Should be set to a code to execute after doing everything
1489 else (but before writing output files).
1490
1491 Table \.E: Indicates which characters need escaped for \TeX. Also used for
1492 category codes in internal typesetting mode (a discussion of the category
1493 codes will be deferred to a later part of this book).
1494
1495 Table \.F: Space factor codes for internal typesetting, where 40 is normal
1496 (multiplying these values by 25 results in the corresponding \.{\\sfcode}
1497 values in \TeX). Zero means no change.
1498
1499 Table \.G: Table containing information for sorting and grouping.
1500
1501 Table \.J: Left margin protrusions for internal typesetting. A value of
1502 128 is normal. Each one unit less or greater than 128 represents a
1503 distance of 0.005 em, where number less than 128 for negative kerns and
1504 greater than 128 for positive kerns. (Note that you will use {\sl negative
1505 negative} kerns to protrude into the margin, both for the left protrusions
1506 and for the right protrusions!)
1507
1508 Table \.K: Right margin protrusions for internal typesetting.
1509
1510 Table \.L: Conversion to lowercase.
1511
1512 Table \.S: Information for natural sorting.
1513
1514 Table \.U: Conversion to uppercase.
1515
1516 Table \.W: Table for word form rules. Zero means a letter, one means a
1517 word separator, two means use to mark beginning of a word, three means use
1518 to mark the end of a word. In this program, it is advantageous to use the
1519 fact that zero means word characters (such as letters), and nonzero means
1520 nonword characters.
1521
1522 @d init_register(_reg,_val) do@+{
1523   registers[_reg].is_string=0;
1524   registers[_reg].number=(_val);
1525 }@+while(0)@;
1526 @#
1527 @d init_register_str(_reg,_val) do@+{
1528   registers[_reg].is_string=1;
1529   registers[_reg].text=strdup(_val);
1530 }@+while(0)@;
1531
1532 @<Initialize the tables and registers@>= {
1533   int i;
1534   for(i=0;i<256;i++) init_register(i,0);
1535   init_register('E','\\');
1536   init_register('V',version_number);
1537   @<Initialize table of alphabetical case conversion@>;
1538   @<Initialize tables for internal typesetting@>;
1539 }
1540
1541 @ @<Initialize table of alphabetical case conversion@>= {
1542   for(i=0;i<256;i++) tables['L'][i]=tables['U'][i]=i;
1543   for(i='A';i<='Z';i++) {
1544     tables['L'][i]=i+'a'-'A';
1545     tables['U'][i+'a'-'A']=i;
1546   }
1547 }
1548
1549 @ @<Display the contents of table |*++ptr|@>= {
1550   int t=*++ptr;
1551   int i;
1552   for(i=0;i<256;i++) {
1553     printf("%c%c",tables[t][i]?'+':'.',@|
1554       (tables[t][i]<0x7F && tables[t][i]>=' ')?tables[t][i]:'.'
1555     );
1556     if((i&0x0F)==0x0F) printf("\n");
1557   }
1558   for(i=' ';i<0x7F;i++) if(tables[t][i]) printf("%c",i);
1559 }
1560
1561 @*Diagnostics. Here is diagnostics commands. These are used to display the
1562 internal information on standard output, so that you can check how these
1563 things are working. (You can also use \.{gdb} for debugging purposes.) A
1564 diagnostics command always starts with a question mark, and is then
1565 followed by one more character indicating the type of diagnostics
1566 requestsed. (Some are followed by an additional character after that.)
1567
1568 @<Do a diagnostics command@>= {
1569   switch(*++ptr) {
1570     case 'c': @<Display the sorted card list@>; @+break;
1571     case 'd': @<Display the deck list@>; @+break;
1572     case 'f': @<Display font information@>; @+break;
1573     case 'k': @<Display the list of keywords@>; @+break;
1574     case 'n': @<Display the list of names@>; @+break;
1575     case 'p': @<Display the list of patterns@>; @+break;
1576     case 's': @<Display the contents of the stack@>; @+break;
1577     case 't': @<Display the contents of table |*++ptr|@>; @+break;
1578     case 'w': @<Display the list of word form rules@>; @+break;
1579     case 'x': @<Display the list of typeset nodes@>; @+break;
1580     case 'y': @<Display typesetting diagnostics@>; @+break;
1581     default: program_error("Unknown type of diagnostics");
1582   }
1583 }
1584
1585 @ One subroutine is used here for displaying strings with escaped, so that
1586 it will display on a terminal without messing it up or omitting the
1587 display of some characters.
1588
1589 @-p void display_string(char*s) {
1590   for(;*s;s++) {
1591     if(*s<' ' || *s==0x7F) {
1592       printf("^%c",0x40^*s);
1593     } @+else {
1594       printf("%c",*s);
1595     }
1596   }
1597 }
1598
1599 @ @<Display the contents of the stack@>= {
1600   register_value*p;
1601   for(p=stack;p<=stack_ptr;p++) {
1602     if(p->is_string) {
1603       printf("[");
1604       display_string(p->text);
1605       printf("]\n");
1606     } @+else {
1607       printf("%d\n",p->number);
1608     }
1609   }
1610 }
1611
1612 @ More of the diagnostics functions are included in the chapters for the
1613 data structures which it is displaying.
1614
1615 @*Pattern Matching. Now, finally, after the chapter about patterns, and
1616 going through many other things in between, comes to the chapter in which
1617 patterns are actually being matched.
1618
1619 One structure is used here for the information about how to match it, and
1620 what has been matched from it. The parameter |num_capture| is how many
1621 captured parts there are, and the |start| and |end| arrays store the index
1622 into the |src| string of where the matches are. The entire matched part is
1623 indicated by |start[0]| and |end[0]| (note always |start[0]==0|).
1624
1625 @<Typedefs@>=
1626 typedef struct {
1627   char*src;
1628   char*truesrc; // used for checking true beginning of the line
1629   char*pattern;
1630   unsigned int category;
1631   int start[16];
1632   int end[16];
1633   int num_capture;
1634 } match_info;
1635
1636 @ This first one just matches one pattern against a string to see if it
1637 matches. It returns true if it does match. (It is somewhat inefficient.)
1638
1639 @-p boolean match_pattern(match_info*mat) {
1640   char*src; // current start of source string
1641   char*ptr; // pointer into source string |src|
1642   char*pptr; // pointer into pattern string
1643   src=mat->src; @+ mat->num_capture=0; @+ pptr=mat->pattern; @+ ptr=src;
1644   @<Execute the pattern on the string |src|@>;
1645   mismatch: return 0;
1646 }
1647
1648 @ This loop executes each command in the pattern in attempt to match each
1649 character. In case of mismatch, it will break out of this loop, and
1650 continue with the next iteration of the loop in the previous section.
1651
1652 @d not_a_marker !(pptr[-1]&0x80)
1653
1654 @<Execute the pattern on the string |src|@>= {
1655   while(*pptr) {
1656     switch(*pptr++) {
1657       case begin_capture:
1658         mat->start[++mat->num_capture]=ptr-mat->src; @+break;
1659       case end_capture: mat->end[mat->num_capture]=ptr-mat->src; @+break;
1660       case match_keyword: @<Do |match_keyword|@>; @+break;
1661       case match_table:
1662         if(!tables[*pptr++][*ptr++]) goto mismatch; @+break;
1663       case optional_table: ptr+=!!tables[*pptr++][*ptr]; @+break;
1664       case failed_match: goto mismatch;
1665       case jump_table:
1666         if(!(pptr=strchr(mat->pattern,0x80|tables[*pptr++][*ptr++])))
1667           goto mismatch;
1668         @+break;
1669       case successful_match: @<Do |successful_match|@>;
1670       case back_one_space: if(ptr--==mat->src) goto mismatch; @+break;
1671       case forward_one_space: if(!*ptr++) goto mismatch; @+break;
1672       case match_left_side: if(ptr!=mat->truesrc) goto mismatch; @+break;
1673       case match_right_side: if(*ptr>=' ') goto mismatch; @+break;
1674       default: if(not_a_marker && pptr[-1]!=*ptr++) goto mismatch;
1675     }
1676   }
1677 }
1678
1679 @ @<Do |successful_match|@>= {
1680   mat->start[0]=0;
1681   mat->end[0]=ptr-mat->src;
1682   return 1;
1683 }
1684
1685 @ And now, the next part matches from an area and changes the string in
1686 place, possibly by reallocating it. The |src| pointer passed to this
1687 function should be one that can be freed!
1688
1689 @-p char*do_patterns(char*src,int area) {
1690   pattern_data*pat;
1691   match_info mat;
1692   int index=0; // index into |src| string
1693   @<Cancel if there isn't a pattern area@>;
1694 continue_matching:
1695   if(index>=strlen(src)) return src;
1696   pat=pattern_areas.data+name_info(area).pattern_area;
1697   for(;;) {
1698     @<Fill up the |mat| structure for testing the current pattern@>;
1699     if(mat.pattern && match_pattern(&mat)) {
1700       @<Push the captured strings to the stack@>;
1701       @<Call the subroutine associated with this pattern@>;
1702       if(stack_ptr->is_string) {
1703         @<Replace the matched part from the stack and fix the |index|@>;
1704       } @+else {
1705         index+=mat.end[0];
1706       }
1707       stack_drop();
1708       goto continue_matching;
1709     }
1710     @<Select the next pattern in this area or |break|@>;
1711   }
1712   index++; @+ goto continue_matching;
1713 }
1714
1715 @ @<Cancel if there isn't a pattern area@>= {
1716   if(area<256) return src;
1717   if(!name_info(area).has_pattern_area) return src;
1718 }
1719
1720 @ @<Fill up the |mat| structure for testing the current pattern@>= {
1721   mat.src=src+index;
1722   mat.truesrc=src;
1723   mat.pattern=pat->text;
1724   mat.category=pat->category;
1725 }
1726
1727 @ @<Push the captured strings to the stack@>= {
1728   int i;
1729   for(i=mat.num_capture;i;i--) {
1730     push_string(src+index+mat.start[i]);
1731     stack_ptr->text[mat.end[i]-mat.start[i]]=0;
1732   }
1733 }
1734
1735 @ @<Call the subroutine associated with this pattern@>= {
1736   execute_program(names.data[pat->subroutine].value.text);
1737 }
1738
1739 @ The memory allocated is probably more than is needed, but this way is
1740 simpler. It is always sufficient amount, though. Think about it.
1741
1742 @^thought@>
1743
1744 @<Replace the matched part from the stack and fix the |index|@>= {
1745   char*q=malloc(strlen(src)+strlen(stack_ptr->text)+1);
1746   strcpy(q,src);
1747   sprintf(q+index,"%s%s",stack_ptr->text,src+index+mat.end[0]);
1748   free(src);
1749   src=q;
1750   index+=strlen(stack_ptr->text);
1751 }
1752
1753 @ @<Select the next pattern in this area or |break|@>= {
1754   if(pat->next==none) break;
1755   pat=pattern_areas.data+pat->next;
1756 }
1757
1758 @ Finally, there is a command |'M'| to do a pattern matching and
1759 replacement with a string, inside of a stack subroutine code.
1760
1761 @<Cases for system commands@>=
1762   @-case 'M': {
1763     // do pattern matching and replacement
1764     int n=pop_num();
1765     if(!stack_ptr->is_string) program_error("Type mismatch");
1766     stack_ptr->text=do_patterns(stack_ptr->text,n);
1767     break;
1768   }@#
1769
1770 @*Matching Keywords. Codes for matching keywords have been placed in
1771 another chapter, instead of making the previous chapter longer.
1772
1773 So now we can see how it is matched keywords in a pattern code.
1774
1775 @<Do |match_keyword|@>= {
1776   match_info m;
1777   char mstr[512];
1778   char t=*pptr++; // indicate which table to use
1779   data_index best=none;
1780   int best_length=-1;
1781   @<Try matching each keyword belonging to the category@>;
1782   if(best==none) goto mismatch;
1783   @<Adjust the \.K register for this keyword match@>;
1784   ptr+=m.end[0];
1785 }
1786
1787 @ @<Adjust the \.K register for this keyword match@>= {
1788   if(registers['K'].is_string) free(registers['K'].text);
1789   registers['K'].is_string=0;
1790   registers['K'].number=best;
1791 }
1792
1793 @ When matching keywords, all of them will be tried, in case there are
1794 better candidates for the search (bigger is better (so, for example,
1795 |"Power of One"| will override |"Power"|); failing that, later ones are
1796 better than earlier ones (so that user files can override keywords in
1797 template files)).
1798
1799 @^Courtenay, Bryce@>
1800 @^Houghton, Israel@>
1801 @^Luce, Ron@>
1802
1803 @<Try matching each keyword belonging to the category@>= {
1804   data_index i;
1805   foreach(i,keywords) {
1806     if(keywords.data[i].category&mat->category &&
1807      strlen(keywords.data[i].match)>=best_length) {
1808       @<Set up the |match_info| structure called |m|@>;
1809       @<Attempt applying this keyword match@>;
1810     }
1811   }
1812 }
1813
1814 @ @<Set up the |match_info| structure called |m|@>= {
1815   sprintf(mstr,"%s%c%c%c",
1816     keywords.data[i].match,match_table,t,successful_match);
1817   m.src=m.truesrc=ptr;
1818   m.pattern=mstr;
1819 }
1820
1821 @ @<Attempt applying this keyword match@>= {
1822   if(match_pattern(&m)) {
1823     best=i;
1824     best_length=strlen(keywords.data[i].match);
1825   }
1826 }
1827
1828 @*Sorting and Grouping. The card lists can be sorted/grouped using these
1829 commands, which are generally used by macros that create the records for
1830 the cards in the card areas.
1831
1832 @<Cases for system commands@>=
1833   @-case 'n': {
1834     // Add a new list entry
1835     data_index n=new_record(card_list);
1836     card_list.data[n].token_ptr=
1837       card_areas.data[set_card_area(registers['C'].number)].used
1838     ;
1839     break;
1840   }@#
1841   @-case 'f': {
1842     // Set a field value of the list entry
1843     data_index n=card_list.used-1;
1844     int x=pop_num();
1845     int y=pop_num();
1846     if(n==none) program_error("No card list is available");
1847     card_list.data[n].field[x&31]=y;
1848     break;
1849   }@#
1850
1851 @ Other than the commands to make the list entries above, there must be,
1852 of course, the actual sorting and grouping being done!
1853
1854 Sorting and grouping are controlled by the \.G table. Starting from a
1855 given offset (added), you use thirty-two entries for the thirty-two
1856 fields.
1857
1858 @<Cases for system commands@>=
1859   @-case 'G': {
1860     // Sort the list
1861     sorting_table_offset=pop_num();
1862     qsort(card_list.data,card_list.used,sizeof(list_entry),list_compare);
1863     @<Mark positions in the sorted list@>;
1864     break;
1865   }@#
1866
1867 @ @<Global variables@>=
1868 int sorting_table_offset;
1869
1870 @ This is the compare function for the list sorting. It is also worth to
1871 notice here what values belong in the \.G table. (There are also some
1872 other values, which are described a bit later.)
1873
1874 @d no_sort 0
1875 @d primary_ascending 'A'
1876 @d primary_descending 'Z'
1877 @d primary_name 'N'
1878 @d secondary_ascending 'a'
1879 @d secondary_descending 'z'
1880 @d secondary_name 'n'
1881 @d record_sorted_position 'R'
1882 @d reset_high_bits 'q'
1883 @#
1884 @d G_table(_field) (tables['G'][((sorting_table_offset+(_field))&0xFF)])
1885 @d p1s ((list_entry*)p1)
1886 @d p2s ((list_entry*)p2)
1887
1888 @-p int list_compare(const void*p1,const void*p2) {
1889   @<Compare using fields indicated by \.G table@>;
1890   @<Compare using the card's name and the \.S table@>;
1891   @<Compare using the order in which the cards are typed in@>;
1892   return 0; // This can't, but will, happen.
1893 }
1894
1895 @ @<Compare using fields indicated by \.G table@>= {
1896   int i;
1897   for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1898     if(G_table(i)==primary_ascending || (G_table(i)&0x80)) {
1899       return (p1s->field[i]>p2s->field[i])?1:-1;
1900     } @+else if(G_table(i)==primary_descending) {
1901       return (p1s->field[i]<p2s->field[i])?1:-1;
1902     } @+else if(G_table(i)==primary_name) {
1903       return name_compare(p1s->field[i],p2s->field[i]);
1904     }
1905   }
1906   for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1907     if(G_table(i)==secondary_ascending) {
1908       return (p1s->field[i]>p2s->field[i])?1:-1;
1909     } @+else if(G_table(i)==secondary_descending) {
1910       return (p1s->field[i]<p2s->field[i])?1:-1;
1911     } @+else if(G_table(i)==secondary_name) {
1912       return name_compare(p1s->field[i],p2s->field[i]);
1913     }
1914   }
1915 }
1916
1917 @ When all else fails, \strike{play dead} use the order in which the cards
1918 have been typed in. This is how it is made stable, and that you can get
1919 the same results on any computer.
1920
1921 @^Smith, Steve@>
1922
1923 @<Compare using the order in which the cards...@>= {
1924   if(p1s->token_ptr>p2s->token_ptr) return 1;
1925   if(p1s->token_ptr<p2s->token_ptr) return -1;
1926 }
1927
1928 @ The last thing to do after sorting, is mark positions in the list if it
1929 is requested to do so.
1930
1931 In addition, it shall also optionally mark high bits (30 to 27) of some
1932 fields, based on when other fields change. This helps with doing multi-%
1933 dimensional statistics. The fields that it is based on will automatically
1934 be primary sorted since such sorting is required for the marking algorithm
1935 to work properly.
1936
1937 @<Mark positions in the sorted list@>= {
1938   data_index i;
1939   int j;
1940   for(j=0;j<32;j++) {
1941     if(G_table(j)==record_sorted_position) {
1942       foreach(i,card_list) card_list.data[i].field[j]=i;
1943     } @+else if(G_table(j)&0x80) {
1944       @<Mark high bits of fields to prepare for...@>;
1945     } @+else if(G_table(j)==reset_high_bits) {
1946       foreach(i,card_list) card_list.data[i].field[j]&=0x0FFFFFFF;
1947     }
1948   }
1949 }
1950
1951 @ The rule is that whenever the current field's value changes, the bit in
1952 the corresponding grouping field will be flipped. Since the statistics
1953 grouping always treats consecutive equal values in the grouping field as
1954 belonging to the same group, this is a way to insert ``group breaks'' into
1955 the list.
1956
1957 @<Mark high bits of fields to prepare for complex statistics@>= {
1958   int f=G_table(j)&0x1F; // other field number
1959   int v=card_list.data[0].field[j]; // previous value
1960   int k=1<<(27+((G_table(j)&0x60)>>5)); // bit flip value
1961   int b=0; // current bit value
1962   foreach(i,card_list) {
1963     if(v!=card_list.data[i].field[j]) b^=k;
1964     card_list.data[i].field[f]&=~k;
1965     card_list.data[i].field[f]|=b;
1966     v=card_list.data[i].field[j];
1967   }
1968 }
1969
1970 @ @<Display the sorted card list@>= {
1971   data_index i;
1972   int j;
1973   foreach(i,card_list) {
1974     printf("%d=[ ",card_list.data[i].token_ptr);
1975     for(j=0;j<32;j++) printf("%d ",card_list.data[i].field[j]);
1976     printf("]\n");
1977   }
1978 }
1979
1980 @*Natural Sorting. A natural compare algorithm is used here. It is a
1981 generalization of Martin Pool's algorithm\biblio{Pool, Martin. ``Natural
1982 Order String Comparison''. {\tt
1983 http://sourcefrog.net/projects/natsort/}.}.
1984
1985 The \.S table maps from character tokens to the sorting specifications.
1986 Name tokens are converted to |whatsit| when looking up in this table.
1987
1988 Tokens are grouped into digits, letters, and priority letters. There are
1989 also some extras, such as spaces and radix point. A string of consecutive
1990 digits is treated as numeric, so a number with more digits comes after a
1991 number with less digits.
1992
1993 Priority letters are used mainly for sorting roman numerals. Two or more
1994 consecutive priority letters are considered as a group, otherwise they are
1995 treated in the same way as ordinary letters. A group is ranked with the
1996 letters latest in the alphabet, so for example, if |'I'| and |'X'| are
1997 priority, then |"IX"| is placed between |"W"| and |"X"|. This way, all
1998 roman numerals from I to XXXIX will be sorted correctly.
1999
2000 @^natural compare@>
2001 @^Pool, Martin@>
2002
2003 @d nat_end_low 0
2004 @d nat_end_high 1
2005 @d nat_space 2
2006 @d nat_ignore 3
2007 @d nat_radix_point 4
2008 @#
2009 @d nat_digit_zero 64 // digits go up to 127
2010 @d nat_first_letter 128 // letters go up to 191
2011 @d nat_first_priority_letter 192 // priority letters go up to 255
2012 @d nat_high_value 256
2013
2014 @<Compare using the card's name and the \.S table@>= {
2015   token*pa=card_areas.data[set_card_area(registers['C'].number)].tokens
2016    +p1s->token_ptr;
2017   token*pb=card_areas.data[set_card_area(registers['C'].number)].tokens
2018    +p2s->token_ptr;
2019   boolean fractional=0; // Are we reading digits after a radix point?
2020   int a,b,c;
2021   for(;;pa++,pb++) {
2022     begin_natural_compare_loop: @/
2023     a=tables['S'][*pa>=256?whatsit:*pa];
2024     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2025     @<Skip over leading spaces and/or zeros@>;
2026     @<Process a run of digits@>;
2027     @<Check if the end of either string is reached@>;
2028     @<Check for a radix point@>;
2029     @<Process priority letters@>;
2030     @<Check if the current positions of each string sufficiently differ@>;
2031   }
2032 }
2033
2034 @ @<Skip over leading spaces and/or zeros@>= {
2035   while(a==nat_space||a==nat_ignore||(!fractional&&a==nat_digit_zero)) {
2036     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2037     if(a!=nat_ignore) fractional=0;
2038     if(!fractional && a==nat_digit_zero
2039      && aa>=nat_digit_zero && aa<nat_first_letter) break;
2040     pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2041   }
2042   while(b==nat_space||b==nat_ignore||(!fractional&&b==nat_digit_zero)) {
2043     int bb=tables['S'][pa[1]>=256?whatsit:pa[1]];
2044     if(b!=nat_ignore) fractional=0;
2045     if(!fractional && b==nat_digit_zero
2046      && bb>=nat_digit_zero && bb<nat_first_letter) break;
2047     pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2048   }
2049 }
2050
2051 @ @<Process a run of digits@>= {
2052   if(a>=nat_digit_zero&&a<nat_first_letter&&
2053    b>=nat_digit_zero&&b<nat_first_letter) {
2054     if((c=(fractional?compare_left:compare_right)(pa,pb))) return c;
2055     @<Skip the run of digits, since they are the same@>;
2056     fractional=0;
2057   }
2058 }
2059 @^strange codes@>
2060
2061 @ Compare two left-aligned numbers: the first to have a different value
2062 wins. This function and |compare_right| are basically equivalent, there
2063 are only a few differences (this one is the simpler one).
2064
2065 @-p int compare_left(token*pa,token*pb) {
2066   int a,b;
2067   for(;;pa++,pb++) {
2068     a=tables['S'][*pa>=256?whatsit:*pa];
2069     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2070     @<Skip over ignored characters@>;
2071     @<If neither |a| nor |b| is digit, |break|@>;
2072     @<If one is a digit and the other isn't, the longest run wins@>;
2073     @<If both are different digits, the greater one wins@>;
2074   }
2075   return 0;
2076 }
2077
2078 @ The longest run of digits wins. That aside, the greatest value wins, but
2079 we can't know that it will until we've scanned both numbers to know they
2080 have the same magnitude, so we remember it in |bias|.
2081
2082 @-p int compare_right(token*pa,token*pb) {
2083   int a,b;
2084   int bias=0;
2085   for(;;pa++,pb++) {
2086     a=tables['S'][*pa>=256?whatsit:*pa];
2087     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2088     @<Skip over ignored characters@>;
2089     @<If neither |a| nor |b| is digit, |break|@>;
2090     @<If one is a digit and the other isn't, the longest run wins@>;
2091     @<If both are digits, set the |bias|@>;
2092   }
2093   return bias;
2094 }
2095
2096 @ Ignored characters might be commas for grouping digits into thousands.
2097
2098 @<Skip over ignored characters@>= {
2099   while(a==nat_ignore) {
2100     pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2101   }
2102   while(b==nat_ignore) {
2103     pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2104   }
2105 }
2106
2107 @ @<If neither |a| nor |b| is digit, |break|@>= {
2108   if(!(a>=nat_digit_zero&&a<nat_first_letter)&&
2109    !(b>=nat_digit_zero&&b<nat_first_letter)) break;
2110 }
2111
2112 @ @<If one is a digit and the other isn't, the longest run wins@>= {
2113   if(!(a>=nat_digit_zero&&a<nat_first_letter)) return -1;
2114   if(!(b>=nat_digit_zero&&b<nat_first_letter)) return 1;
2115 }
2116
2117 @ @<If both are different digits, the greater one wins@>= {
2118   if(a!=b) return a-b;
2119 }
2120
2121 @ @<If both are digits, set the |bias|@>= {
2122   if(a!=b && !bias) bias=(a<b)?-1:1;
2123 }
2124
2125 @ @<Skip the run of digits, since they are the same@>= {
2126   while(a>=nat_digit_zero&&a<nat_first_letter) {
2127     pa++; @+ pb++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2128   }
2129   b=tables['S'][*pb>=256?whatsit:*pb];
2130 }
2131
2132 @ @<Check if the end of either string is reached@>= {
2133   if(a==nat_end_low && b>nat_end_high) return -1;
2134   if(b==nat_end_low && a>nat_end_high) return 1;
2135   if(a==nat_end_high && b>nat_end_high) return 1;
2136   if(b==nat_end_high && a>nat_end_high) return -1;
2137   if(a<=nat_end_high && b<=nat_end_high) break; // tied
2138 }
2139
2140 @ A radix point must be followed by a digit, otherwise it is considered to
2141 be punctuation (and ignored). Radix points come before digits in the
2142 sorting order (|".5"| comes before |"5"|).
2143
2144 @<Check for a radix point@>= {
2145   if(a==nat_radix_point && b==nat_radix_point) {
2146     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2147     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2148     if(aa>=nat_digit_zero&&aa<nat_first_letter
2149      &&bb>=nat_digit_zero&&bb<nat_first_letter) fractional=1;
2150   } @+else if(a==nat_radix_point) {
2151     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2152     if(!(aa>=nat_digit_zero&&aa<nat_first_letter)) {
2153       pa++; goto begin_natural_compare_loop;
2154     }
2155   } @+else if(b==nat_radix_point) {
2156     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2157     if(!(bb>=nat_digit_zero&&bb<nat_first_letter)) {
2158       pb++; goto begin_natural_compare_loop;
2159     }
2160   }
2161 }
2162
2163 @ This is used so that |"IX"| can be sorted between |"VIII"| and |"X"|. In
2164 normal alphabetical order, |"IX"| sorts before |"V"|. This algorithm makes
2165 it so that doesn't happen. For example: |a| is |'I'| and |aa| (the
2166 character after |a| in the text) is |'X'| (the check |aa>a| ensures that
2167 it too is priority, in addition to checking that |a| represents a negative
2168 part of a roman number), and |b| is |'V'|. Now, since |'V'| comes between
2169 |'I'| and |'X'| in the alphabetical order, the condition is checked to be
2170 valid and it overrides the later check.
2171
2172 @<Process priority letters@>= {
2173   if(a>=nat_first_priority_letter) {
2174     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2175     if(aa>a && b>=nat_first_letter && (b&63)>(a&63) && (b&63)<(aa&63))
2176       return 1;
2177   }
2178   if(b>=nat_first_priority_letter) {
2179     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2180     if(bb>b && a>=nat_first_letter && (a&63)>(b&63) && (a&63)<(bb&63))
2181       return -1;
2182   }
2183 }
2184
2185 @ At this point, |a| and |b| will both be |@[@]>=nat_radix_point|. Numbers
2186 always come after letters (this rule is designed so that when a radix
2187 point is found after a number, it will make a larger number; otherwise it
2188 will be followed by a letter and therefore the one followed by the letter
2189 is lesser since it has no fractional part to make it greater).
2190
2191 @<Check if the current positions of each string suffic...@>= {
2192   if(a>=nat_first_priority_letter) a-=64;
2193   if(b>=nat_first_priority_letter) b-=64;
2194   if(a<nat_first_letter) a+=128;
2195   if(b<nat_first_letter) b+=128;
2196   if(a!=b) return (a<b)?-1:1;
2197 }
2198
2199 @*Name Sorting. This kind of sorting is used when items are grouped
2200 together by some extra field in the statistics, such as creature types in
2201 Magic: the Gathering.
2202
2203 It works in a similar way to the natural sorting algorithm, but this time
2204 it is simpler and not as many things need to be checked. Digits and
2205 priority letters are treated as normal letters, and the types |nat_space|,
2206 |nat_ignore|, and |nat_radix_point| are all ignored. In addition, a null
2207 terminator is always treated as |nat_end_low|.
2208
2209 If both names compare the same, their number is used instead, in order to
2210 force sorting stability.
2211
2212 @-p int name_compare(int n1,int n2) {
2213   char*s1=name_info(n1).name;
2214   char*s2=name_info(n2).name;
2215   int a,b;
2216   for(;*s1 || *s2;s1++,s2++) {
2217     a=(*s1)?tables['S'][*s1]:nat_end_low;
2218     b=(*s2)?tables['S'][*s2]:nat_end_low;
2219     @<Skip over spaces and ignored characters@>;
2220     @<Check if the end of either string is reached@>;
2221     @<Check if the current positions of...@>;
2222   }
2223   return (n1<n2)?-1:1;
2224 }
2225
2226 @ @<Skip over spaces and ignored characters@>= {
2227   while(a<nat_digit_zero) {
2228     s1++; @+ a=(*s1)?tables['S'][*s1]:nat_end_low;
2229   }
2230   while(b<nat_digit_zero) {
2231     s2++; @+ b=(*s2)?tables['S'][*s2]:nat_end_low;
2232   }
2233 }
2234
2235 @*Statistics. After the card lists are created and sorted and grouped, it
2236 can make statistics from them. It can be just a plain list, or it can be
2237 in summary of groups, measuring count, minimum, maximum, mean, median, and
2238 so on.
2239
2240 First we do the simple iteration.
2241
2242 @^mean@>
2243 @^median@>
2244 @^groups@>
2245 @^minimum@>
2246 @^maximum@>
2247
2248 @<Cases for system commands@>=
2249   @-case 'V': {
2250     // Iterate the card list
2251     data_index i;
2252     char*q=pop_string();
2253     if(!stack_ptr[1].is_string) program_error("Type mismatch");
2254     foreach(i,card_list) {
2255       push_num(card_list.data[i].token_ptr);
2256       store('A');
2257       execute_program(q);
2258     }
2259     free(q);
2260     break;
2261   }@#
2262   @-case 'v': {
2263     // Read a field from the card list
2264     int x=pop_num()&31;
2265     int y=0;
2266     data_index i;
2267     foreach(i,card_list) {
2268       if(registers['A'].number==card_list.data[i].token_ptr)
2269         y=card_list.data[i].field[x];
2270     }
2271     push_num(y);
2272     break;
2273   }@#
2274
2275 @ That was simple, see? Now to do gathering statistics of summary of
2276 groups, which is a bit more complicated. The list is expected to be sorted
2277 by the group field primary, and the statistics field ascending as
2278 secondary, in order to make the correct calculation of the fields.
2279
2280 However, it will not do the sorting automatically, since there are some
2281 reasons why you might want it to work differently. One thing you can do is
2282 to sort the group field {\sl secondary} and some other more major group as
2283 primary, in order to do two-dimensional statistics, and this will work as
2284 long as you do not require the minimum, maximum, or median.
2285
2286 @<Cases for system commands@>=
2287   @-case 'g': {
2288     // Gather statistics of groups
2289     data_index i,si=0;
2290     int x=pop_num()&31; // field for grouping
2291     int y=pop_num()&31; // field to measure statistics with
2292     int sum1,sum2; // running totals of $s_1$ and $s_2$
2293     sum1=sum2=0;
2294     char*q=pop_string(); // code to execute for each group
2295     if(!stack_ptr[1].is_string) program_error("Type mismatch");
2296     foreach(i,card_list) {
2297       if(card_list.data[i].field[x]!=card_list.data[si].field[x]) {
2298         @<Send the results of the current group@>;
2299         sum1=sum2=0; @+ si=i;
2300       }
2301       @<Add to the running totals@>;
2302     }
2303     @<Send the results of the current group@>;
2304     free(q);
2305     break;
2306   }@#
2307
2308 @ Running totals are kept for two quantities called $s_1$ and $s_2$. There
2309 is also $s_0$, but that can be calculated easily using subtraction, so
2310 there is no need to keep a running total. If the sample values are denoted
2311 $x_k$, the following equation represents the running totals:
2312 $$s_j=\sum_{k=1}^N{x_k^j}$$ (note that $s_0=N$.)
2313
2314 @^mathematics@>
2315
2316 @<Add to the running totals@>= {
2317   sum1+=card_list.data[i].field[y];
2318   sum2+=card_list.data[i].field[y]*card_list.data[i].field[y];
2319 }
2320
2321 @ Now we will send the results and call |q|. The results are sent to the
2322 stack in the following order: $s_0$, $s_1$, $s_2$, $Q_0$, $2Q_2$, $Q_4$
2323 (where $Q_0$ is the minimum, $Q_2$ the median, and $Q_4$ the maximum).
2324
2325 From these results, it is then possible to calculate the standard
2326 deviation: $$\sigma={1\over s_0}\sqrt{s_0s_2-s_1^2}$$ and
2327 $$s=\sqrt{s_0s_2-s_1^2\over s_0(s_0-1)}.$$
2328
2329 @^mathematics@>
2330
2331 @<Send the results of the current group@>= {
2332   push_num(i-si); // $s_0$
2333   push_num(sum1); // $s_1$
2334   push_num(sum2); // $s_2$
2335   push_num(card_list.data[si].field[y]); // $Q_0$
2336   push_num(
2337     card_list.data[(si+i)/2].field[y]+card_list.data[(si+i+1)/2].field[y]
2338   ); // $2Q_2$
2339   push_num(card_list.data[i-1].field[y]); // $Q_4$
2340   @# push_num(card_list.data[si].token_ptr); @+ store('A');
2341   execute_program(q);
2342 }
2343
2344 @*Random Pack Generation. Now the codes so that it can create random packs
2345 (such as booster packs) by using the card lists and deck lists.
2346
2347 A command |'P'| is used for evaluation of a deck list. It expects the deck
2348 list number and the code to execute for each card on the list.
2349
2350 @^booster pack@>
2351
2352 @<Cases for system commands@>=
2353   @-case 'P': {
2354     // Generate a random pack or deck
2355     data_index s=set_deck_list(pop_num());
2356     data_index n; // current deck list entry
2357     if(stack_ptr[1].is_string) program_error("Number expected");
2358     @<Figure out what cards belong in the pack@>;
2359     @<Execute the code on the stack for each card in the pack@>;
2360     break;
2361   }@#
2362
2363 @ @<Figure out what cards belong in the pack@>= {
2364   deck_entry*e;
2365   int tries=1000; // How many times can you retry if it fails?
2366   figure_out_again:
2367   if(!--tries) program_error("No cards matched the deck criteria");
2368   n=s;
2369   @<Reset |amount_in_pack| of each card to zero@>;
2370   while(n!=none && (n=(e=deck_lists.data+n)->next)!=none)
2371     @<Process this deck entry@>;
2372 }
2373
2374 @ @<Reset |amount_in_pack| of each card to zero@>= {
2375   data_index i;
2376   foreach(i,card_list) card_list.data[i].amount_in_pack=0;
2377 }
2378
2379 @ The deck entry must be processed according to the flags. Here is a list
2380 of flags:
2381
2382 \.a: Use all cards that meet the criteria, instead of only one. If this is
2383 the case, it is possible to use negative weights to remove cards from the
2384 pack. Also, it cannot fail.
2385 [Combine with \.{x}]
2386
2387 \.k: Select without replacement. It is fail if the total weight is not
2388 enough. There are two ways in which this differs from \.u (below). One is
2389 that the previous lines in the deck list are not used. The other one is
2390 that if the weight is more than one, there will be more than one ball for
2391 that card, therefore the same card can be picked up multiple times.
2392 [Combine with \.{sux}]
2393
2394 \.n: Use the |amount| as a probability. If |amount<=100| then the
2395 probability is |amount/100| otherwise it is |100/amount|. This is a
2396 probability of using the |name| to select another deck list instead of
2397 this one.
2398 [Combine with nothing]
2399
2400 \.s: Skip the next line if this line does not fail. (Normally, if one line
2401 fails, everything does, and you have to try again.)
2402 [Combine with \.{kux}]
2403
2404 \.u: Require unique selection. It is fail if the card is already in this
2405 pack.
2406 [Combine with \.{ksx}]
2407
2408 \.x: Pass the |name| as a string to the code in the \.U register, and then
2409 use the resulting code as the code to determine weights instead of using
2410 the code in the register named by |name| directly. Now you can type things
2411 such as |"12x Forest"| into your deck list.
2412 [Combine with \.{aksu}]
2413
2414 @<Process this deck entry@>= {
2415   if(e->flags&lflag('n')) {
2416     @<Determine whether or not to skip to another deck list@>;
2417   } @+else {
2418     char*c; // code for weights of each card
2419     int total; // total weight of cards
2420     data_index*bag=malloc(sizeof(data_index));
2421     @<Get the code |c| for the weights of each card@>;
2422     @<Calculate the weights of each card@>;
2423     if(!(e->flags&lflag('a')))
2424       @<Select some of the cards at random and add them to the pack@>;
2425     if(e->flags&lflag('x')) free(c);
2426     free(bag);
2427   }
2428 }
2429
2430 @ @<Determine whether or not to skip to another deck list@>= {
2431   boolean q;
2432   if(e->amount<=100) {
2433     q=(gen_random(100)<e->amount);
2434   } @+else {
2435     q=(100<gen_random(e->amount));
2436   }
2437   if(q) n=set_deck_list(find_name(e->name));
2438 }
2439
2440 @ @<Get the code |c| for the weights of each card@>= {
2441   if(e->flags&lflag('x')) {
2442     execute_program(registers['U'].text);
2443     if(stack_ptr->is_string) {
2444       c=pop_string();
2445     } @+else {
2446       program_error("Type mismatch");
2447     }
2448   } @+else {
2449     int n=find_name(e->name);
2450     if(name_info(n).value.is_string) {
2451       c=name_info(n).value.text;
2452     } @+else {
2453       program_error("Type mismatch");
2454     }
2455   }
2456 }
2457
2458 @ @<Calculate the weights of each card@>= {
2459   data_index i;
2460   foreach(i,card_list) {
2461     registers['A'].number=card_list.data[i].token_ptr;
2462     execute_program(c);
2463     if(stack_ptr->number) {
2464       if(e->flags&lflag('a')) {
2465         card_list.data[i].amount_in_pack+=e->amount*stack_ptr->number;
2466       } @+else if(stack_ptr->number>0) {
2467         @<Add the cards to the |bag|@>;
2468       }
2469     }
2470     stack_drop();
2471   }
2472 }
2473
2474 @ The |bag| is like, you put the balls in the bag so that you can mix it
2475 and take one out, whatever number is on the ball is the card you put into
2476 the pack. Except, that there is no balls and no bag.
2477
2478 There is one ball per point of weight.
2479
2480 @^balls@>
2481
2482 @<Add the cards to the |bag|@>= {
2483   int j=stack_ptr->number;
2484   bag=realloc(bag,(total+j)*sizeof(data_index));
2485   while(j--) bag[total+j]=i;
2486   total+=stack_ptr->number;
2487 }
2488
2489 @ If it is not a line which adds all possibilities at once, then the cards
2490 must be selected from the |bag| at random to bag them. In some cases it
2491 will fail.
2492
2493 @<Select some of the cards at random and add them to the pack@>= {
2494   data_index r;
2495   int amount=e->amount;
2496   bag_next:
2497   if(!total) @<Deal with bag failure@>;
2498   r=gen_random(total);
2499   if((e->flags&lflag('u')) && card_list.data[bag[r]].amount_in_pack) {
2500     bag[r]=bag[--total];
2501     goto bag_next;
2502   }
2503   card_list.data[bag[r]].amount_in_pack++;
2504   if(e->flags&lflag('k')) bag[r]=bag[--total];
2505   if(amount--) goto bag_next;
2506   @#if(e->flags&lflag('s')) n=deck_lists.data[n].next;
2507   bag_done: ;
2508 }
2509
2510 @ @<Deal with bag failure@>= {
2511   if(e->flags&lflag('s')) goto bag_done;
2512   else goto figure_out_again;
2513 }
2514
2515 @ Now it must do stuff using the list which is generated. The quantity for
2516 how many of that card is pushed on the stack, and this is done even for
2517 cards with negative quantity (but not for zero quantity).
2518
2519 @<Execute the code on the stack for each card in the pack@>= {
2520   data_index i;
2521   char*q=pop_string();
2522   if(!stack_ptr[1].is_string) program_error("Type mismatch");
2523   foreach(i,card_list) {
2524     if(card_list.data[i].amount_in_pack) {
2525       push_num(card_list.data[i].amount_in_pack);
2526       execute_program(q);
2527     }
2528   }
2529   free(q);
2530 }
2531
2532 @*Reading Input Files. Now it is time for the part of the program where
2533 input files are read and processed. The areas of the file (and other
2534 special commands) are indicated using \.@@ signs.
2535
2536 At first we have state information. Each state is labeled by uppercase
2537 letters, or by digits 1 to 9. The high bit is set for the heading state,
2538 meaning the first line that contains the name and/or other heading
2539 information.
2540
2541 @d null_state 0
2542 @d card_state 'C'
2543 @d deck_state 'D'
2544 @d execute_state 'E'
2545 @d file_state 'F'
2546 @d include_state 'I'
2547 @d keyword_state 'K'
2548 @d image_state 'M'
2549 @d pattern_state 'P'
2550 @d subroutine_state 'S'
2551 @d font_state 'T'
2552 @d encoding_state 'U'
2553 @d wordforms_state 'W'
2554 @d heading 0x80
2555
2556 @<Global variables@>=
2557 int cur_state;
2558 data_index cur_name;
2559 data_index cur_data;
2560 boolean omit_line_break;
2561
2562 @ The next thing that must be kept track of for input files is the stack
2563 of open input files.
2564
2565 @d max_pathname_length 128
2566 @d max_filename_length 128
2567 @d max_input_stack 128
2568 @d max_line_length 256
2569
2570 @<Typedefs@>=
2571 typedef struct {
2572   FILE*fp; // zero for terminal input
2573   char name[max_filename_length+1];
2574   int line;
2575 } input_file_data;
2576
2577 @ @<Global variables@>=
2578 input_file_data input_files[max_input_stack];
2579 input_file_data*current_input_file=input_files;
2580 char input_buffer[max_line_length];
2581
2582 @ Some macros are useful to access the current file data.
2583
2584 @d current_line (current_input_file->line)
2585 @d current_filename (current_input_file->name)
2586 @d current_fp (current_input_file->fp)
2587 @#
2588 @d parsing_error(_text) fprintf(stderr,"%s on line %d in %s\n",
2589  _text,current_line,current_filename)@;
2590
2591 @ There is also conditional processing directives, which uses a single
2592 variable to keep track of the level. If it is greater than zero, the
2593 condition is false, and it is increased for nesting conditions (the
2594 nested conditions have no truth to them).
2595
2596 @<Global variables@>=
2597 int condition_level=0;
2598
2599 @ This subroutine inputs the next line. True is returned if there is a
2600 line, or false if it is finished.
2601
2602 It is necessary to check for end of file and if so, close that file and
2603 try the one it was included from; and if it is terminal input, display the
2604 current state when prompting input from the user.
2605
2606 @-p boolean input_line(void) {
2607   input_line_again: if(current_fp) {
2608     @<Get a line of input from the file@>;
2609   } @+else {
2610     @<Get a line of terminal input@>;
2611   }
2612   @<Remove trailing |'\n'|, |'\r'|, and spaces@>;
2613   ++current_line;
2614   return 1;
2615 }
2616
2617 @ @<Get a line of input from the file@>= {
2618   if(!fgets(input_buffer,max_line_length,current_fp)) {
2619     memusage_log("Closing input file",current_input_file-input_files)@;
2620     fclose(current_fp);
2621     if(current_input_file>input_files) {
2622       --current_input_file;
2623       goto input_line_again;
2624     } @+else {
2625       return 0;
2626     }
2627   }
2628 }
2629
2630 @ @<Get a line of terminal input@>= {
2631   printf("\n%c> ",cur_state?cur_state:'>');
2632   fflush(stdout);
2633   if(!fgets(input_buffer,max_line_length,stdin)) return 0;
2634 }
2635
2636 @ This function is used to open the main input file.
2637
2638 @-p void open_input(char*name) {
2639   if(++current_input_file>input_files+max_input_stack) {
2640     fprintf(stderr,"Too many simultaneous input files\n");
2641 @.Too many simultaneous...@>
2642     exit(1);
2643   }
2644   memusage_log("Opening input file",current_input_file-input_files)@;
2645   strcpy(current_filename,name);
2646   current_line=0;
2647   current_fp=fopen(name,"r");
2648   if(!current_fp) {
2649     fprintf(stderr,"Cannot open input file: %s\n",name);
2650 @.Cannot open input file@>
2651     exit(1);
2652   }
2653 }
2654
2655 @ Trailing newlines and spaces are removed. On some computers, there will
2656 be a carriage return before the line feed, it should be removed, so that
2657 the same file will work on other computers, too.
2658
2659 @d last_character_input input_buffer[strlen(input_buffer)-1]
2660
2661 @<Remove trailing |'\n'|, |'\r'|, and spaces@>= {
2662   if(last_character_input=='\n') last_character_input=0;
2663   if(last_character_input=='\r') last_character_input=0;
2664   while(last_character_input==' ') last_character_input=0;
2665 }
2666
2667 @ The input states start at these values.
2668
2669 @<Initialize the input states@>= {
2670   cur_state=execute_state;
2671   cur_name=cur_data=0;
2672 }
2673
2674 @ Now it is the time to do the actual processing according to the contents
2675 of the lines of the file. A line starting with \.@@ sign will indicate a
2676 special command (to operate in all modes) or a mode switch command.
2677
2678 @d delete_chars(_buf,_c) memmove((_buf),(_buf)+(_c),strlen((_buf)+(_c))+1)
2679
2680 @<Process the input files@>= {
2681   char*buf;
2682   while(input_line()) {
2683     buf=input_buffer;
2684     if(condition_level) {
2685       buf+=strspn(buf," ");
2686       condition_level+=!strcmp(buf,"@@<");
2687       condition_level-=!strcmp(buf,"@@>");
2688     } @+else {
2689       omit_line_break=1;
2690       @<Convert \.@@ commands in the |input_buffer|@>;
2691       omit_line_break=0;
2692       process_line(buf);
2693     }
2694   }
2695 }
2696
2697 @ @<Convert \.@@ commands in the |input_buffer|@>= {
2698   char*ptr=input_buffer;
2699   while(*ptr) {
2700     if(*ptr=='@@') {
2701       @<Convert the current \.@@ command@>;
2702     } @+else {
2703       ptr++;
2704     }
2705   }
2706 }
2707
2708 @ @<Convert the current \.@@ command@>= {
2709   switch(*++ptr) {
2710     case '@@': @/
2711       delete_chars(ptr,1);
2712       break;
2713     case '.': @<Process \.{@@.} command@>;@+break;
2714     case '&': @<Process \.{@@\&} command@>;@+break;
2715     case '^': @<Process \.{@@\^} command@>;@+break;
2716     case '(': @<Process \.{@@(} command@>;@+break;
2717     case '<': @<Process \.{@@<} command@>;@+break;
2718     case '>': @<Remove this command from the input@>;@+break;
2719     default: @/
2720       if((*ptr>='A' && *ptr<='Z') || (*ptr>='0' && *ptr<='9')) {
2721         @<Enter a |heading| state@>;
2722       } @+else {
2723         parsing_error("Unknown @@ command");
2724       }
2725   }
2726 }
2727
2728 @ @<Remove this command from the input@>= {
2729   ptr--;
2730   delete_chars(ptr,2);
2731 }
2732
2733 @ Heading states are used for the first line of a section in the file.
2734 After that line is processed, it becomes the corresponding non-heading
2735 state |(cur_state&~heading)|.
2736
2737 Note: The state |'0'| is deliberately unused; you might use it for
2738 documentation areas, for example.
2739
2740 @^documentation areas@>
2741
2742 @<Enter a |heading| state@>= {
2743   cur_state=heading|*ptr--;
2744   delete_chars(ptr,2);
2745   while(*ptr==' ' || *ptr=='\t') delete_chars(ptr,1);
2746 }
2747
2748 @ @-p void process_line(char*buf) {
2749   int q=cur_state;
2750   cur_state&=~heading;
2751   switch(q) {
2752     case card_state: @<Process card state@>;@+break;
2753     case deck_state: @<Process deck state@>;@+break;
2754     case execute_state: @<Process execute state@>;@+break;
2755     case file_state: @<Process file state@>;@+break;
2756     case keyword_state: @<Process keyword state@>;@+break;
2757     case pattern_state: @<Process pattern state@>;@+break;
2758     case subroutine_state: @<Process subroutine state@>;@+break;
2759     case wordforms_state: @<Process word forms state@>;@+break;
2760     case card_state|heading: @<Process card heading@>;@+break;
2761     case deck_state|heading: @<Process deck heading@>;@+break;
2762     case file_state|heading: @<Process file heading@>;@+break;
2763     case include_state|heading: @<Process include heading@>;@+break;
2764     case keyword_state|heading: @<Process keyword heading@>;@+break;
2765     case pattern_state|heading: @<Process pattern heading@>;@+break;
2766     case subroutine_state|heading: @<Process subroutine heading@>;@+break;
2767     default: ; // nothing happens
2768   }
2769 }
2770
2771 @ Sometimes you might want a macro which can send a line programmatically.
2772 So, here is the way that it is done.
2773
2774 @<Cases for system commands@>=
2775   @-case 'X': {
2776     // Process a line by programming
2777     int n;
2778     if(stack_ptr->is_string) program_error("Type mismatch");
2779     n=pop_num();
2780     if(n) cur_state=n|heading;
2781     if(!stack_ptr->is_string) program_error("Type mismatch");
2782     omit_line_break=1;
2783     process_line(stack_ptr->text);
2784     stack_drop();
2785     break;
2786   }@#
2787
2788 @*Inner Commands. These are commands other than the section headings.
2789
2790 @ The first command to deal with is simple--it is a comment. The rest of
2791 the current line is simply discarded.
2792
2793 @<Process \.{@@.} command@>= {
2794   ptr[-1]=0;
2795 }
2796
2797 @ This command is a pattern split. It means it will process the part of
2798 the line before this command and then process the stuff after it. The
2799 variable |omit_line_break| is 1 if this command is used; because it means
2800 there will not be a line break. (Otherwise, patterns and so on are split
2801 at line breaks.)
2802
2803 @<Process \.{@@\&} command@>= {
2804   ptr[-1]=0;
2805   process_line(buf);
2806   buf=++ptr;
2807 }
2808
2809 @ This allows control characters to be inserted into the input. This code
2810 takes advantage of the way the ASCII code works, in which stripping all
2811 but the five low bits can convert a letter (uppercase or lowercase) to its
2812 corresponding control character.
2813
2814 @^control character@>
2815
2816 @<Process \.{@@\^} command@>= {
2817   ptr[1]&=0x1F;
2818   --ptr;
2819   delete_chars(ptr,2);
2820 }
2821
2822 @ The following command is used to execute a code in a different state and
2823 then include the results in this area.
2824
2825 @<Process \.{@@(} command@>= {
2826   char*p;
2827   char*q;
2828   @<Skip over the name and save the rest of the line@>;
2829   @<Execute the code for the named subroutine@>;
2830   @<Insert the returned string and fix the line buffer@>;
2831 }
2832
2833 @ @<Skip over the name and save the rest of the line@>= {
2834   p=ptr+1;
2835   while(*ptr && *ptr!=')') ptr++;
2836   q=strdup(ptr+!!*ptr);
2837   *ptr=0;
2838 }
2839
2840 @ @<Execute the code for the named subroutine@>= {
2841   int n=find_name(p);
2842   execute_program(fetch_code(n));
2843 }
2844
2845 @ @<Insert the returned string and fix the line buffer@>= {
2846   char*s=pop_string();
2847   sprintf(p-2,"%s%s",s,q);
2848   ptr=p+strlen(s)-2;
2849   free(s);
2850   free(q);
2851 }
2852
2853 @ This command is used for conditional processing. The condition value
2854 comes from the stack. Zero is false, everything else is true.
2855
2856 @<Process \.{@@<} command@>= {
2857   --ptr;
2858   delete_chars(ptr,2);
2859   condition_level=!stack_ptr->number;
2860   stack_drop();
2861 }
2862
2863 @*Card State. Cards are added to the card areas by using the card state.
2864 The \.C register tells which is the current card area, and \.P register is
2865 used to select the current pattern area. The pattern area is used to match
2866 patterns after reading a line. Please note that it won't work to change
2867 the value of the \.C register during the card state.
2868
2869 @<Process card heading@>= {
2870   int n=find_name(buf);
2871   cur_data=set_card_area(n);
2872   cur_name=n-256;
2873   push_num(n);@+store('C');
2874 }
2875
2876 @ @<Process card state@>= {
2877   char*b;
2878   if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
2879   @<Initialize the \.W register@>;
2880   b=do_patterns(strdup(buf),registers['P'].number);
2881   if(registers['W'].is_string) execute_program(registers['W'].text);
2882   @<Send the tokens of |b| and replace whatsits@>;
2883   free(b);
2884 }
2885
2886 @ @<Initialize the \.W register@>= {
2887   if(registers['W'].is_string) free(registers['W'].text);
2888   registers['W'].is_string=1;
2889   registers['W'].text=strdup("");
2890 }
2891
2892 @ @<Send the tokens of |b| and replace whatsits@>= {
2893   char*p;
2894   for(p=b;*p;p++) {
2895     if(*p==whatsit) {
2896       send_token(cur_data,pop_num());
2897     } @+else {
2898       send_token(cur_data,(*p==1)?0:*p);
2899     }
2900   }
2901 }
2902
2903 @ There is one command you might want to send tokens in any other time.
2904
2905 @<Cases for system commands@>=
2906   @-case 'T': {
2907     // Add tokens to the card area
2908     if(stack_ptr->is_string) {
2909       @<Send tokens from the string on the stack@>;
2910     } @+else {
2911       send_token(set_card_area(registers['C'].number),stack_ptr->number);
2912     }
2913     stack_drop();
2914     break;
2915   }@#
2916
2917 @ @<Send tokens from the string on the stack@>= {
2918   char*p;
2919   data_index q=set_card_area(registers['C'].number);
2920   for(p=stack_ptr->text;*p;p++) send_token(q,*p);
2921 }
2922
2923 @*Deck State. Deck state is used for creating deck lists and random packs.
2924
2925 @<Process deck heading@>= {
2926   cur_name=find_name(buf)-256;
2927   cur_data=set_deck_list(cur_name+256);
2928   @<Skip to the end of the deck list@>;
2929 }
2930
2931 @ @<Skip to the end of the deck list@>= {
2932   while(deck_lists.data[cur_data].next!=none)
2933     cur_data=deck_lists.data[cur_data].next;
2934 }
2935
2936 @ Now to parse each line in turn. Each line consists of a number, the
2937 flags, and a text.
2938
2939 @<Process deck state@>= {
2940   int n=strtol(buf,&buf,10);
2941   unsigned int f=0;
2942   if(n) {
2943     buf+=strspn(buf,"\x20\t");
2944     @<Read the flags for the deck list@>;
2945     buf+=strspn(buf,"\x20\t"); // Now we are at the point of the text
2946     @<Send this line to the deck list@>;
2947     @<Create and advance to the new terminator of the deck list@>;
2948   }
2949 }
2950
2951 @ @<Read the flags for the deck list@>= {
2952   while(*buf>='a' && *buf<='z') f |=lflag(*buf++);
2953   buf++; // Skip terminator of flags
2954 }
2955
2956 @ If the \.x flag is set, it will be determined what to do with the text
2957 by the user-defined code. Otherwise, it is always a name, so we can save
2958 memory by pointing to the name buffer (since name buffers never vary).
2959
2960 @<Send this line to the deck list@>= {
2961   if(f&lflag('x')) {
2962     deck_lists.data[cur_data].name=strdup(buf);
2963   } @+else {
2964     deck_lists.data[cur_data].name=name_info(find_name(buf)).name;
2965   }
2966 }
2967
2968 @ @<Create and advance to the new terminator of the deck list@>= {
2969   data_index i=new_record(deck_lists);
2970   deck_lists.data[cur_data].next=i;
2971   deck_lists.data[cur_data=i].next=none;
2972 }
2973
2974 @*Execute State. This state is simple, just execute stack codes. It is the
2975 initial state; you can use it with terminal input, too.
2976
2977 @<Process execute state@>= {
2978   execute_program(buf);
2979 }
2980
2981 @*File State. This state is used to make list of output files. Each one is
2982 stored as a string, like subroutine state. The difference is that newlines
2983 will not be discarded. The other difference is that there is a flag in the
2984 |name_data| record for it that tells it that it is a file that should be
2985 sent to output.
2986
2987 @<More elements of |name_data|@>=
2988   boolean is_output_file;
2989
2990 @ @<Process file heading@>= {
2991   cur_name=find_name(buf)-256;
2992   if(!names.data[cur_name].value.is_string) {
2993     names.data[cur_name].value.is_string=1;
2994     names.data[cur_name].value.text=strdup("");
2995     names.data[cur_name].is_output_file=1;
2996   }
2997 }
2998
2999 @ @<Process file state@>= {
3000   int z=strlen(names.data[cur_name].value.text);
3001   if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3002   names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3003     z+strlen(buf)+1);
3004   strcpy(names.data[cur_name].value.text+z,buf);
3005 }
3006
3007 @*Include State. The include state causes inclusion of another source file
3008 from this one.
3009
3010 @<Process include heading@>= {
3011   cur_state=execute_state;
3012   @<Push the include file onto the input stack@>;
3013   @<Attempt to open the include file...@>;
3014 }
3015
3016 @ @<Push the include file onto the input stack@>= {
3017   ++current_input_file;
3018   memusage_log("Opening input file",current_input_file-input_files)@;
3019   strcpy(current_filename,buf);
3020   current_line=0;
3021   current_fp=0;
3022 }
3023
3024 @ Include files are searched using the search path specified in the
3025 environment variable called \.{TEXNICARDPATH}, which is a list of paths
3026 delimited by colons on UNIX systems (including Cygwin), but semicolons on
3027 Windows (colons are used in Windows for drive letters). A forward slash is
3028 the path separator. Please note that if you want to use include files in
3029 the current directory, you must include |"."| as the first entry in the
3030 search path!!
3031
3032 @^search path@>
3033 @.TEXNICARDPATH@>
3034 @^Windows@>
3035
3036 @<Set |includepath_separator| depending on operating system@>=
3037 #ifdef WIN32
3038 #define @!includepath_separator ';'
3039 #else
3040 #define includepath_separator ':'
3041 #endif
3042
3043 @ @<Attempt to open the include file by finding it in the search path@>= {
3044   current_fp=open_file(current_filename,"r");
3045   @<It is a fatal error if no such file was found@>;
3046 }
3047
3048 @ Since this part of the code is activated in many parts of the program,
3049 we will make it a subroutine that can open files in different modes.
3050
3051 @-p FILE*open_file(char*filename,char*mode) {
3052   char searchpath[max_pathname_length+max_filename_length+1];
3053   char*cpath;
3054   char*npath=getenv("TEXNICARDPATH");
3055   FILE*fp=0;
3056   strcpy(searchpath,npath?npath:".");
3057   npath=cpath=searchpath;
3058   @<Set |includepath_separator| depending on operating system@>;
3059  @<Attempt to open the file from each each directory in the search path@>;
3060   return fp;
3061 }
3062
3063 @ @<Attempt to open the file from each each directory...@>= {
3064   while(!fp) {
3065     char f[max_pathname_length+max_filename_length+1];
3066     @<Select the next path name into |cpath| and |npath|@>;
3067     sprintf(f,"%s/%s",cpath,filename);
3068     fp=fopen(f,mode);
3069   }
3070 }
3071
3072 @ @<Select the next path name into |cpath| and |npath|@>= {
3073   if(!(cpath=npath)) break;
3074   if((npath=strchr(npath,includepath_separator))) *npath++=0;
3075 }
3076
3077 @ @<It is a fatal error if no such file was found@>= {
3078   if(!current_fp) {
3079     fprintf(stderr,"%s not found in search path.\n",current_filename);
3080 @.not found in search path@>
3081     exit(1);
3082   }
3083 }
3084
3085 @*Keyword State. You can add keywords to the keyword area by using this.
3086 Each keyword heading is one entry in the list.
3087
3088 @<Process keyword heading@>= {
3089   cur_data=new_record(keywords);
3090   keywords.data[cur_data].match=strdup(buf);
3091   keywords.data[cur_data].replacement=strdup("");
3092 }
3093
3094 @ @<Process keyword state@>= {
3095   keyword_data*k=&keywords.data[cur_data];
3096   if(*buf=='+') {
3097     k->category|=find_category(buf+1);
3098   } @+else {
3099     if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3100     @<Append buffer to keyword text@>;
3101   }
3102 }
3103
3104 @ @<Append buffer to keyword text@>= {
3105   if(*buf) {
3106     int z=strlen(k->replacement);
3107     k->replacement=realloc(k->replacement,z+strlen(buf)+1);
3108     strcpy(k->replacement+z,buf);
3109   }
3110 }
3111
3112 @*Pattern State. This state compiles patterns into a pattern area. It
3113 uses its own syntax, and then is converted into the proper control codes
3114 for the |text| of a pattern.
3115
3116 @<Process pattern heading@>= {
3117   cur_name=find_name(buf)-256;
3118   cur_data=set_pattern_area(cur_name+256);
3119 }
3120
3121 @ The stuff inside the pattern state has its own commands.
3122
3123 @<Process pattern state@>= {
3124   char add_buf[1024]; // buffer of text to add to the current pattern
3125   pattern_data*pat=&pattern_areas.data[cur_data];
3126   *add_buf=0;
3127   switch(*buf) {
3128     case '<': @<Create a new pattern with top priority@>;@+break;
3129     case '>': @<Create a new pattern with bottom priority@>;@+break;
3130     case ':': @<Make a pattern text with a marker@>;@+break;
3131     case '+': @<Add a keyword category to this pattern@>;@+break;
3132     default: ; // do nothing
3133   }
3134   @<Append contents of |add_buf| to the pattern, if needed@>;
3135 }
3136
3137 @ @<Create a new pattern with top priority@>= {
3138   cur_data=new_record(pattern_areas);
3139   pattern_areas.data[cur_data].text=strdup("");
3140   pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3141   pattern_areas.data[cur_data].next=names.data[cur_name].pattern_area;
3142   names.data[cur_name].pattern_area=cur_data;
3143 }
3144
3145 @ @<Create a new pattern with bottom priority@>= {
3146   data_index n;
3147   cur_data=new_record(pattern_areas);
3148   pattern_areas.data[cur_data].text=strdup("");
3149   pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3150   pattern_areas.data[cur_data].next=none;
3151   @<Find the bottom pattern and store its index in |n|@>;
3152   pattern_areas.data[n].next=cur_data;
3153 }
3154
3155 @ @<Find the bottom pattern and...@>= {
3156   n=names.data[cur_name].pattern_area;
3157   while(pattern_areas.data[n].next!=none && pattern_areas.data[n].text &&
3158    pattern_areas.data[pattern_areas.data[n].next].next!=none)
3159     n=pattern_areas.data[n].next;
3160 }
3161
3162 @ Actually, the name of this \strike{cake} chunk is a lie, because it does
3163 not always add a marker.
3164
3165 @<Make a pattern text with a marker@>= {
3166   char*p;
3167   char*b=add_buf;
3168   @<Add the pattern marker if applicable@>;
3169   for(p=buf+2;p[-1] && *p;p++) {
3170     switch(*p) {
3171       case '\\': *b++=*++p; @+break;
3172       case '(': *b++=begin_capture; @+break;
3173       case ')': *b++=end_capture; @+break;
3174       case '%': *b++=match_keyword; @+*b++=*++p; @+break;
3175       case '!': *b++=match_table; @+*b++=*++p; @+break;
3176       case '?': *b++=optional_table; @+*b++=*++p; @+break;
3177       case '#': *b++=failed_match; @+break;
3178       case '&': *b++=jump_table; @+*b++=*++p; @+break;
3179       case ';': *b++=successful_match; @+break;
3180       case '<': *b++=back_one_space; @+break;
3181       case '>': *b++=forward_one_space; @+break;
3182       case '[': *b++=match_left_side; @+break;
3183       case ']': *b++=match_right_side; @+break;
3184       default: *b++=*p; @+break;
3185     }
3186   }
3187   *b=0;
3188 }
3189
3190 @ @<Add the pattern marker if applicable@>= {
3191   if(buf[1]>' ') *b++=buf[1]|0x80;
3192 }
3193
3194 @ @<Add a keyword category to this pattern@>= {
3195   pattern_areas.data[cur_data].category=find_category(buf+1);
3196 }
3197
3198 @ @<Append contents of |add_buf| to the pattern...@>= {
3199   if(*add_buf) {
3200     int z=strlen(pat->text);
3201     pat->text=realloc(pat->text,z+strlen(add_buf)+1);
3202     strcpy(pat->text+z,add_buf);
3203   }
3204 }
3205
3206 @*Subroutine State. This state is used to add a named subroutine.
3207
3208 @<Process subroutine heading@>= {
3209   cur_name=find_name(buf)-256;
3210   if(!names.data[cur_name].value.is_string) {
3211     names.data[cur_name].value.is_string=1;
3212     names.data[cur_name].value.text=strdup("");
3213   }
3214 }
3215
3216 @ @<Process subroutine state@>= {
3217   int z=strlen(names.data[cur_name].value.text);
3218   names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3219     z+strlen(buf)+1);
3220   strcpy(names.data[cur_name].value.text+z,buf);
3221 }
3222
3223 @*Word Forms State. You can use the word forms state to enter rules and
3224 exceptions for word forms, such as plurals.
3225
3226 @<Global variables@>=
3227 char wordform_code[256]; // code to execute at \.= line
3228 char wordform_kind; // which kind of word forms is being made now?
3229
3230 @ @<Process word forms state@>= {
3231   switch(*buf) {
3232     case '>': @<Process \.> line in word forms state@>; @+break;
3233     case '=': @<Process \.= line in word forms state@>; @+break;
3234     case '#': wordform_kind=buf[1]; @+break;
3235     default: if(*buf>='0' && *buf<='9')
3236       @<Process numeric line in word forms state@>;
3237   }
3238 }
3239
3240 @ The commands are \.>, \.=, and numbers. The command \.> sets a code for
3241 processing \.= commands, and then add to the list.
3242
3243 @<Process \.> line in word forms state@>= {
3244   strcpy(wordform_code,buf+1);
3245 }
3246
3247 @ @<Process \.= line in word forms state@>= {
3248   int level,kind;
3249   char*orig;
3250   char*dest;
3251   push_string(buf+1);
3252   execute_program(wordform_code);
3253   kind=pop_num(); @+ level=pop_num();
3254   dest=pop_string(); @+ orig=pop_string();
3255   add_word_form(kind,level,orig,dest);
3256   free(orig); @+ free(dest);
3257 }
3258
3259 @ Now the command for numeric forms. You put ``level\.:orig\.:dest'' in
3260 that order, please.
3261
3262 @<Process numeric line in word forms state@>= {
3263   int level=strtol(buf,&buf,0);
3264   char*p;
3265   if(*buf==':') buf++;
3266   p=strchr(buf,':');
3267   if(p) *p=0;
3268   add_word_form(wordform_kind,level,buf,p+1);
3269 }
3270
3271 @*Writing Output Files. Finally, it will be time to send any output
3272 generated into the files (if there is any, which there usually is).
3273
3274 @^output@>
3275
3276 @d ctrl(_letter) (0x1F&(_letter))
3277 @#
3278 @d call_final_subroutine ctrl('C')
3279 @d copy_field ctrl('F')
3280 @d newline ctrl('J')
3281 @d loop_point ctrl('L')
3282 @d next_record ctrl('N')
3283 @d skip_one_character ctrl('S')
3284
3285 @<Write the output files@>= {
3286   data_index n;
3287   foreach(n,names) {
3288     if(names.data[n].is_output_file && names.data[n].value.is_string)
3289       @<Write this output file@>;
3290   }
3291 }
3292
3293 @ @<Write this output file@>= {
3294   FILE*fout=fopen(names.data[n].name,"w");
3295   char*ptr=names.data[n].value.text;
3296   char*loopptr=ptr; // loop point
3297   if(!fout) @<Error about unable to open output file@>;
3298   while(*ptr) @<Write the character and advance to the next one@>;
3299   fclose(fout);
3300 }
3301
3302 @ @<Error about unable to open output file@>= {
3303   fprintf(stderr,"Unable to open output file: %s\n",names.data[n].name);
3304 @.Unable to open output file@>
3305   exit(1);
3306 }
3307
3308 @ @<Write the character and advance to the next one@>= {
3309   switch(*ptr) {
3310     case call_final_subroutine: @<Do |call_final_subroutine|@>; @+break;
3311     case copy_field: @<Do |copy_field|@>; @+break;
3312     case loop_point: loopptr=++ptr; @+break;
3313     case next_record: @<Do |next_record|@>; @+break;
3314     case skip_one_character: ptr+=2; @+break;
3315     default: fputc(*ptr++,fout);
3316   }
3317   done_writing_one_character: ;
3318 }
3319
3320 @ @<Do |call_final_subroutine|@>= {
3321   register_value*v;
3322   if(*++ptr=='(') {
3323     char*p=strchr(ptr,')');
3324     *p=0;
3325     v=&name_info(find_name(ptr+1)).value;
3326     *p=')';
3327     ptr=p+1;
3328   } @+else {
3329     v=&registers[*ptr++];
3330   }
3331   if(v->is_string) {
3332     execute_program(v->text);
3333     @<Write or loop based on result of subroutine call@>;
3334     stack_drop();
3335   }
3336 }
3337
3338 @ @<Write or loop based on result of subroutine call@>= {
3339   if(stack_ptr->is_string) {
3340     fprintf(fout,"%s",stack_ptr->text);
3341   } @+else if(stack_ptr->number) {
3342     ptr=loopptr;
3343   }
3344 }
3345
3346 @ This command is used to copy the next field.
3347
3348 Look at the definition for the |send_reg_char_or_text| macro. It is
3349 strange, but it should work wherever a statement is expected. Please note
3350 that a ternary condition operator should have both choices of the same
3351 type.
3352
3353 @^strange codes@>
3354
3355 @d tok_idx (registers['A'].number)
3356 @d tok_area
3357   (card_areas.data[name_info(registers['C'].number).value.number].tokens)
3358
3359 @d send_reg_char_or_text(_reg)
3360   if(!registers[_reg].is_string || *registers[_reg].text)
3361     fprintf(fout, "%c%s",
3362       registers[_reg].is_string?
3363         *registers[_reg].text:registers[_reg].number,
3364       registers[_reg].is_string?
3365         registers[_reg].text+1:(unsigned char*)""
3366     )@;
3367
3368 @<Do |copy_field|@>= {
3369   ++ptr;
3370   for(;;) {
3371     switch(tok_area[tok_idx++]) {
3372       case null_char: @<Unexpected |null_char|@>;
3373       case end_transmission: tok_idx=0; @+goto done_writing_one_character;
3374       case tabulation: send_reg_char_or_text('T'); @+break;
3375       case raw_data: @<Do |raw_data|@>; @+break;
3376       case escape_code: send_reg_char_or_text('E'); @+break;
3377       case record_separator: tok_idx--; @+goto done_writing_one_character;
3378       case field_separator: @+goto done_writing_one_character;
3379       default: @/
3380         if(tok_area[--tok_idx]&~0xFF)
3381           @<Deal with name code@>@;
3382         else
3383           @<Deal with normal character@>;
3384         tok_idx++;
3385     }
3386   }
3387 }
3388
3389 @ @<Unexpected |null_char|@>= {
3390   fprintf(stderr,"Unexpected null character found in a card area\n");
3391 @.Unexpected null character...@>
3392   exit(1);
3393 }
3394
3395 @ @<Do |raw_data|@>= {
3396   while(tok_area[tok_idx]) fputc(tok_area[tok_idx++],fout);
3397   tok_idx++;
3398 }
3399
3400 @ A name code found here is a code to tell it to call the subroutine code
3401 when it is time to write it out to the file. It should return a string on
3402 the stack (if it is a number, it will be ignored).
3403
3404 @<Deal with name code@>= {
3405   if(name_info(tok_area[tok_idx]).value.is_string)
3406     execute_program(name_info(tok_area[tok_idx]).value.text);
3407   if(stack_ptr->is_string) fprintf(fout,"%s",stack_ptr->text);
3408   stack_drop();
3409 }
3410
3411 @ In case of a normal character, normally just write it out. But some
3412 characters need escaped for \TeX.
3413
3414 @<Deal with normal character@>= {
3415   if(tables['E'][tok_area[tok_idx]]) send_reg_char_or_text('E');
3416   fputc(tok_area[tok_idx],fout);
3417 }
3418
3419 @ This one moves to the next record, looping if a next record is in fact
3420 available. Otherwise, just continue. Note that a |record_separator|
3421 immediately followed by a |end_transmission| is assumed to mean there is
3422 no next record, and that there is allowed to be a optional
3423 |record_separator| just before the |end_transmission|.
3424
3425 @<Do |next_record|@>= {
3426   ++ptr;
3427   while(tok_area[tok_idx]!=record_separator &&
3428    tok_area[tok_idx]!=end_transmission) tok_idx++;
3429   if(tok_area[tok_idx]!=end_transmission &&
3430    tok_area[tok_idx+1]!=end_transmission) ptr=loopptr;
3431 }
3432
3433 @*Functions Common to DVI and GF. Numbers for \.{GF} and \.{DVI} files use
3434 the |dvi_number| data type. (Change this in the change file if the current
3435 setting is inappropriate for your system.)
3436
3437 There is also the |dvi_measure| type, which is twice as long and is used
3438 to compute numbers that can be fractional (with thirty-two fractional bits
3439 and thirty-two normal bits).
3440
3441 @<Typedefs@>=
3442 @q[Type of DVI numbers::]@>
3443 typedef signed int dvi_number;
3444 typedef signed long long int dvi_measure;
3445 @q[::Type of DVI numbers]@>
3446
3447 @ There is one subroutine here to read a |dvi_number| from a file. They
3448 come in different sizes and some are signed and some are unsigned.
3449
3450 @^endianness@>
3451 @^byte order@>
3452
3453 @-p dvi_number get_dvi_number(FILE*fp,boolean is_signed,int size) {
3454   dvi_number r=0;
3455   if(size) r=fgetc(fp);
3456   if((r&0x80) && is_signed) r|=0xFFFFFF00;
3457   while(--size) r=(r<<8)|fgetc(fp);
3458   return r;
3459 }
3460
3461 @ Some macros are defined here in order to deal with |dvi_measure| values.
3462
3463 @^fractions@>
3464
3465 @d to_measure(_value) (((dvi_measure)(_value))<<32)
3466 @d floor(_value) ((dvi_number)((_value)>>32))
3467 @d round(_value) ((dvi_number)(((_value)+0x8000)>>32))
3468 @d ceiling(_value) ((dvi_number)(((_value)+0xFFFF)>>32))
3469
3470 @ Here division must be done in a careful way, to ensure that none of the
3471 intermediate results exceed sixty-four bits.
3472
3473 @d fraction_one to_measure(1)
3474
3475 @-p dvi_measure make_fraction(dvi_measure p,dvi_measure q) {
3476   dvi_measure f,n;
3477   boolean negative=(p<0)^(q<0);
3478   if(p<0) p=-p;
3479   if(q<0) q=-q;
3480   n=p/q; @+ p=p%q;
3481   n=(n-1)*fraction_one;
3482   @<Compute $f=\lfloor2^{32}(1+p/q)+{1\over2}\rfloor$@>;
3483   return (f+n)*(negative?-1:1);
3484 }
3485
3486 @ Notice that the computation specifies $(p-q)+p$ instead of $(p+p)-q$,
3487 because the latter could overflow.
3488
3489 @<Compute $f=...@>= {
3490   register dvi_measure b;
3491   f=1;
3492   while(f<fraction_one) {
3493     b=p-q; @+ p+=b;
3494     if(p>=0) {
3495       f+=f+1;
3496     } @+else {
3497       f<<=1;
3498       p+=q;
3499     }
3500   }
3501 }
3502
3503 @ And a few miscellaneous macros.
3504
3505 @d upto4(_var,_cmd) (_var>=_cmd && _var<_cmd+4)
3506
3507 @*DVI Reading. The device-independent file format is a format invented by
3508 David R.~Fuchs in 1979. The file format need not be explained here, since
3509 there are other books which explain it\biblio{Knuth, Donald. ``\TeX: The
3510 Program''. Computers {\char`\&} Typesetting. ISBN 0-201-13437-3.}\biblio{%
3511 Knuth, Donald. ``\TeX ware''. Stanford Computer Science Report 1097.}.
3512
3513 \edef\TeXwareBiblio{\the\bibliocount}
3514 @^Fuchs, David@>
3515 @.DVI@>
3516 @^device independent@>
3517
3518 At first, names will be given for the commands in a \.{DVI} file.
3519
3520 @d set_char_0 0 // Set a character and move [up to 127]
3521 @d set1 128 // Take one parameter to set character [up to 131]
3522 @d set_rule 132 // Set a rule and move down, two parameters
3523 @d put1 133 // As |set1| but no move [up to 136]
3524 @d put_rule 137 // As |set_rule| but no move
3525 @d nop 138 // No operation
3526 @d bop 139 // Beginning of a page
3527 @d eop 140 // End of a page
3528 @d push 141 // Push $(h,v,w,x,y,z)$ to the stack
3529 @d pop 142 // Pop $(h,v,w,x,y,z)$ from the stack
3530 @d right1 143 // Take one parameter, move right [up to 146]
3531 @d w0 147 // Move right $w$ units
3532 @d w1 148 // Set $w$ and move right [up to 151]
3533 @d x0 152 // Move right $x$ units
3534 @d x1 153 // Set $x$ and move right [up to 156]
3535 @d down1 157 // Take one parameter, move down [up to 160]
3536 @d y0 161 // Move down $y$ units
3537 @d y1 162 // Set $y$ and move down [up to 165]
3538 @d z0 166 // Move down $z$ units
3539 @d z1 167 // Set $z$ and move down [up to 170]
3540 @d fnt_num_0 171 // Select font 0 [up to 234]
3541 @d fnt1 235 // Take parameter to select font [up to 238]
3542 @d xxx1 239 // Specials [up to 242]
3543 @d fnt_def1 243 // Font definitions [up to 246]
3544 @d pre 247 // Preamble
3545 @d post 248 // Postamble
3546 @d post_post 249 // Postpostamble
3547
3548 @ We should now start reading the \.{DVI} file. Filenames of fonts being
3549 used will be sent to standard output.
3550
3551 @-p boolean read_dvi_file(char*filename) {
3552   boolean fonts_okay=1;
3553   FILE*fp=fopen(filename,"rb");
3554   if(!fp) dvi_error(fp,"Unable to open file");
3555   @#@<Skip the preamble of the \.{DVI} file@>;
3556   @<Skip to the next page@>;
3557   @<Read the metapage heading@>;
3558   @<Compute the conversion factor@>;
3559   read_dvi_page(fp);
3560   @<Skip to and read the postamble@>;
3561   @<Read the font definitions and load the fonts@>;
3562   if(fonts_okay) @<Read the pages for each card@>;
3563   @#fclose(fp);
3564   return fonts_okay;
3565 }
3566
3567 @ @-p void dvi_error(FILE*fp,char*text) {
3568   fprintf(stderr,"DVI error");
3569 @.DVI error@>
3570   if(fp) fprintf(stderr," at %08X",ftell(fp));
3571   fprintf(stderr,": %s\n",text);
3572   exit(1);
3573 }
3574
3575 @ Please note the version number of the \.{DVI} file must be 2.
3576
3577 @<Skip the preamble of the \.{DVI} file@>= {
3578   if(fgetc(fp)!=pre) dvi_error(fp,"Bad preamble");
3579   if(fgetc(fp)!=2) dvi_error(fp,"Wrong DVI version");
3580   @<Read the measurement parameters@>;
3581   @<Skip the DVI comment@>;
3582 }
3583
3584 @ @<Read the measurement parameters@>= {
3585   unit_num=get_dvi_number(fp,0,4);
3586   unit_den=get_dvi_number(fp,0,4);
3587   unit_mag=get_dvi_number(fp,0,4);
3588 }
3589
3590 @ @<Skip the DVI comment@>= {
3591   int n=fgetc(fp);
3592   fseek(fp,n,SEEK_CUR);
3593 }
3594
3595 @ From the postamble we can read the pointer for the last page.
3596
3597 @<Global variables@>=
3598 dvi_number last_page_ptr;
3599
3600 @ @<Skip to and read the postamble@>= {
3601   fseek(fp,-4,SEEK_END);
3602   while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
3603   fseek(fp,-5,SEEK_CUR);
3604   fseek(fp,get_dvi_number(fp,0,4)+1,SEEK_SET);
3605   last_page_ptr=get_dvi_number(fp,0,4);
3606   fseek(fp,20,SEEK_CUR); // Skipped parameters of |post|
3607   dvi_stack=malloc(get_dvi_number(fp,0,2)*sizeof(dvi_stack_entry));
3608   fseek(fp,2,SEEK_CUR);
3609 }
3610
3611 @ Between the preamble and the first page can be |nop| commands and font
3612 definitions, so these will be skipped. The same things can occur between
3613 the end of one page and the beginning of the next page.
3614
3615 @<Skip to the next page@>= {
3616   int c;
3617   for(;;) {
3618     c=fgetc(fp);
3619     if(c==bop) break;
3620     if(c>=fnt_def1 && c<fnt_def1+4) {
3621       @<Skip a font definition@>;
3622     } @+else if(c!=nop) {
3623       dvi_error(fp,"Bad command between pages");
3624     }
3625   }
3626 }
3627
3628 @ @<Skip a font definition@>= {
3629   int a,l;
3630   fseek(fp,c+13-fnt_def1,SEEK_CUR);
3631   a=fgetc(fp);
3632   l=fgetc(fp);
3633   fseek(fp,a+l,SEEK_CUR);
3634 }
3635
3636 @ The metapage includes the resolution and other things which must be set,
3637 such as subroutine codes. The resolution must be read before fonts can be
3638 read. Please note that no characters can be typeset on the metapage, since
3639 fonts have not been loaded yet. You can still place empty boxes. The DPI
3640 setting (resolution) is read from the \.{\\count1} register.
3641
3642 @<Read the metapage heading@>= {
3643   dvi_number n=get_dvi_number(fp,0,4);
3644   if(n) {
3645     fprintf(stderr,"Metapage must be numbered zero (found %d).\n",n);
3646 @.Metapage must be...@>
3647     exit(1);
3648   }
3649   push_num(get_dvi_number(fp,0,4)); @+ store('D');
3650   fseek(fp,9*4,SEEK_CUR); // Skip other parameters
3651   layer_width=layer_height=0;
3652 }
3653
3654 @ A stack is kept of the page registers, for use with the |push| and |pop|
3655 commands of a \.{DVI} file. This stack is used by the |read_dvi_page|
3656 subroutine and stores the |quan| registers (described in the next
3657 chapter).
3658
3659 @<Typedefs@>=
3660 typedef struct {
3661   dvi_number h;
3662   dvi_number v;
3663   dvi_number w;
3664   dvi_number x;
3665   dvi_number y;
3666   dvi_number z;
3667   dvi_number hh;
3668   dvi_number vv;
3669 } dvi_stack_entry;
3670
3671 @ @<Global variables@>=
3672 dvi_stack_entry*dvi_stack;
3673 dvi_stack_entry*dvi_stack_ptr;
3674
3675 @ Here is the subroutine to read commands from a DVI page. The file
3676 position should be at the beginning of the page after the |bop| command.
3677
3678 @^pages@>
3679
3680 @-p void read_dvi_page(FILE*fp) {
3681   memusage_log("Beginning of page",fseek(fp));
3682   @<Reset the page registers and stack@>;
3683   typeset_new_page();
3684   @<Read the commands of this page@>;
3685   if(layer_width && layer_height) @<Render this page@>;
3686 }
3687
3688 @ @<Reset the page registers and stack@>= {
3689   quan('A')=quan('B')=quan('H')=quan('I')=quan('J')=quan('L')=quan('V')=
3690   quan('W')=quan('X')=quan('Y')=quan('Z')=0;
3691   dvi_stack_ptr=dvi_stack;
3692 }
3693
3694 @ @<Read the commands of this page@>= {
3695   int c,a;
3696   boolean moveaftertyping;
3697   for(;;) {
3698     c=fgetc(fp);
3699     if(c<set1) {
3700       moveaftertyping=1;
3701       @<Typeset character |c| on the current layer@>;
3702     } @+else if(upto4(c,set1)) {
3703       moveaftertyping=1;
3704       c=get_dvi_number(fp,0,c+1-set1);
3705       @<Typeset character |c| on the current layer@>;
3706     } @+else if(c==set_rule || c==put_rule) {
3707       moveaftertyping=(c==set_rule);
3708       c=get_dvi_number(fp,1,4);
3709       a=get_dvi_number(fp,1,4);
3710       @<Typeset |a| by |c| rule on the current layer@>;
3711     } @+else if(upto4(c,put1)) {
3712       moveaftertyping=0;
3713       c=get_dvi_number(fp,0,c+1-put1);
3714       @<Typeset character |c| on the current layer@>;
3715     } @+else if(c==eop) {
3716       break;
3717     } @+else if(c==push) {
3718       if(dvi_stack) @<Push DVI registers to stack@>;
3719     } @+else if(c==pop) {
3720       if(dvi_stack) @<Pop DVI registers from stack@>;
3721     } @+else if(upto4(c,right1)) {
3722       c=get_dvi_number(fp,1,c+1-right1);
3723       horizontal_movement(c);
3724     } @+else if(c==w0) {
3725       horizontal_movement(quan('W'));
3726     } @+else if(upto4(c,w1)) {
3727       c=get_dvi_number(fp,1,c+1-w1);
3728       horizontal_movement(quan('W')=c);
3729     } @+else if(c==x0) {
3730       horizontal_movement(quan('X'));
3731     } @+else if(upto4(c,x1)) {
3732       c=get_dvi_number(fp,1,c+1-x1);
3733       horizontal_movement(quan('X')=c);
3734     } @+else if(upto4(c,down1)) {
3735       c=get_dvi_number(fp,1,c+1-down1);
3736       vertical_movement(c);
3737     } @+else if(c==y0) {
3738       vertical_movement(quan('Y'));
3739     } @+else if(upto4(c,y1)) {
3740       c=get_dvi_number(fp,1,c+1-y1);
3741       vertical_movement(quan('Y')=c);
3742     } @+else if(c==z0) {
3743       vertical_movement(quan('Z'));
3744     } @+else if(upto4(c,z1)) {
3745       c=get_dvi_number(fp,1,c+1-z1);
3746       vertical_movement(quan('Z')=c);
3747     } @+else if(c>=fnt_num_0 && c<fnt1) {
3748       quan('F')=c-fnt_num_0;
3749     } @+else if(upto4(c,fnt1)) {
3750       quan('F')=get_dvi_number(fp,0,c+1-fnt1);
3751     } @+else if(upto4(c,xxx1)) {
3752       c=get_dvi_number(fp,0,c+1-xxx1);
3753       @<Read a special of length |c|@>;
3754     } @+else if(upto4(c,fnt_def1)) {
3755       @<Skip a font definition@>;
3756     } @+else if(c!=nop) {
3757       dvi_error(fp,"Unknown DVI command");
3758     }
3759   }
3760 }
3761
3762 @ @<Push DVI registers to stack@>= {
3763   dvi_stack_ptr->h=quan('H');
3764   dvi_stack_ptr->v=quan('V');
3765   dvi_stack_ptr->w=quan('W');
3766   dvi_stack_ptr->x=quan('X');
3767   dvi_stack_ptr->y=quan('Y');
3768   dvi_stack_ptr->z=quan('Z');
3769   dvi_stack_ptr->hh=quan('I');
3770   dvi_stack_ptr->vv=quan('J');
3771   ++dvi_stack_ptr;
3772 }
3773
3774 @ @<Pop DVI registers from stack@>= {
3775   --dvi_stack_ptr;
3776   quan('H')=dvi_stack_ptr->h;
3777   quan('V')=dvi_stack_ptr->v;
3778   quan('W')=dvi_stack_ptr->w;
3779   quan('X')=dvi_stack_ptr->x;
3780   quan('Y')=dvi_stack_ptr->y;
3781   quan('Z')=dvi_stack_ptr->z;
3782   quan('I')=dvi_stack_ptr->hh;
3783   quan('J')=dvi_stack_ptr->vv;
3784 }
3785
3786 @ A special in \TeX nicard is used to execute a special code while reading
3787 the DVI file. Uses might be additional calculations, changes of registers,
3788 special effects, layer selection, etc. All of these possible commands are
3789 dealt with elsewhere in this program. All we do here is to read it and to
3790 send it to the |execute_program| subroutine.
3791
3792 @^specials@>
3793
3794 @<Read a special of length |c|@>= {
3795   char*buf=malloc(c+1);
3796   fread(buf,1,c,fp);
3797   buf[c]=0;
3798   @<Set \.X and \.Y registers to prepare for the special@>;
3799   execute_program(buf);
3800   free(buf);
3801 }
3802
3803 @ @<Set \.X and \.Y registers to prepare for the special@>= {
3804   registers['X'].is_string=registers['Y'].is_string=0;
3805   registers['X'].number=quan('I');
3806   registers['Y'].number=quan('J');
3807 }
3808
3809 @ In order to read all the pages for each card, we can skip backwards by
3810 using the back pointers. Either we will print all cards (in reverse
3811 order), or we will print cards listed on the command-line, or we will
3812 print cards listed in a file (this last way might be used to print decks
3813 or booster packs).
3814
3815 Card numbers should be one-based, and should not be negative. Any pages
3816 with negative page numbers will be ignored when it is in the mode for
3817 printing all cards.
3818
3819 @d printing_all_cards 0
3820 @d printing_list 1
3821 @d printing_list_from_file 2
3822
3823 @<Global variables@>=
3824 unsigned char printing_mode;
3825 char*printlisttext;
3826 FILE*printlistfile;
3827
3828 @ @<Read the pages for each card@>= {
3829   dvi_number page_ptr=last_page_ptr;
3830   dvi_number e=0,n; // page numbers
3831   boolean pagenotfound=0;
3832   for(;;) {
3833     @<Read the next entry from the list of pages (if applicable)@>;
3834     try_next_page:
3835     @<Seek the next page to print@>;
3836     @<Read the heading for this page@>;
3837     @<If this page shouldn't be printed now, |goto try_next_page|@>;
3838     pagenotfound=0;
3839     read_dvi_page(fp);
3840   }
3841   @#done_printing:;
3842 }
3843
3844 @ @<Read the next entry from the list of pages (if applicable)@>= {
3845   if(printing_mode==printing_list) {
3846     if(!*printlisttext) goto done_printing;
3847     e=strtol(printlisttext,&printlisttext,10);
3848     if(!e) goto done_printing;
3849     if(*printlisttext) printlisttext++;
3850   } @+else if(printing_mode==printing_list_from_file) {
3851     char buf[256];
3852     if(!printlistfile || feof(printlistfile)) goto done_printing;
3853     if(!fgets(buf,255,printlistfile)) goto done_printing;
3854     e=strtol(buf,0,10);
3855   }
3856 }
3857
3858 @ @<Seek the next page to print@>= {
3859   if(page_ptr==-1) {
3860     if(pagenotfound) {
3861       fprintf(stderr,"No page found: %d\n",e);
3862 @.No page found...@>
3863       exit(1);
3864     }
3865     page_ptr=last_page_ptr;
3866     if(printing_mode==printing_all_cards) goto done_printing;
3867     pagenotfound=1;
3868   }
3869   fseek(fp,page_ptr+1,SEEK_SET);
3870 }
3871
3872 @ @<Read the heading for this page@>= {
3873   n=quan('P')=get_dvi_number(fp,1,4);
3874   fseek(fp,4,SEEK_CUR);
3875   layer_width=get_dvi_number(fp,1,4);
3876   layer_height=get_dvi_number(fp,1,4);
3877   fseek(fp,4*6,SEEK_CUR);
3878   page_ptr=get_dvi_number(fp,1,4);
3879 }
3880
3881 @ @<If this page shouldn't be printed now, |goto try_next_page|@>= {
3882   if(n<=0 && printing_mode==printing_all_cards) goto try_next_page;
3883   if(n!=e && printing_mode!=printing_all_cards) goto try_next_page;
3884 }
3885
3886 @*DVI Font Metrics. Here, the fonts are loaded. It is assumed all fonts
3887 are in the current directory, and the ``area'' of the font name is
3888 ignored. The checksum will also be ignored (it can be checked with
3889 external programs if necessary).
3890
3891 @^area@>
3892 @^font loading@>
3893
3894 @<Read the font definitions and load the fonts@>= {
3895   int c;
3896   for(;;) {
3897     c=fgetc(fp);
3898     if(c==post_post) break;
3899     if(c>=fnt_def1 && c<fnt_def1+4) {
3900       int k=get_dvi_number(fp,0,c+1-fnt_def1);
3901       if(k&~0xFF) dvi_error(fp,"Too many fonts");
3902       memusage_log("Loading font",k);
3903       @<Read the definition for font |k| and load it@>;
3904     } @+else if(c!=nop) {
3905       dvi_error(fp,"Bad command in postamble");
3906     }
3907   }
3908   memusage_log("End of postamble",c);
3909 }
3910
3911 @ When reading fonts, it will be necessary to keep a list of the fonts
3912 and their character indices. Only 256 fonts are permitted in one job.
3913
3914 @<Global variables@>=
3915 data_index fontindex[256];
3916
3917 @ @<Read the definition for font |k| and load it@>= {
3918   dvi_number c=get_dvi_number(fp,0,4); // checksum (unused)
3919   dvi_number s=get_dvi_number(fp,0,4); // scale factor
3920   dvi_number d=get_dvi_number(fp,0,4); // design size
3921   int a=get_dvi_number(fp,0,1); // length of area
3922   int l=get_dvi_number(fp,0,1); // length of name
3923   char n[257];
3924   fseek(fp,a,SEEK_CUR);
3925   fread(n,1,l,fp);
3926   n[l]=0;
3927   if((fontindex[k]=read_gf_file(n,s,d))==none) fonts_okay=0;
3928 }
3929
3930 @ An important part of reading the font metrics is the width computation,
3931 which involves multiplying the relative widths in the \.{TFM} file (or
3932 \.{GF} file) by the scaling factor in the \.{DVI} file. This
3933 multiplication must be done in precisely the same way by all \.{DVI}
3934 reading programs, in order to validate the assumptions made by \.{DVI}-%
3935 writing programs such as \TeX.
3936
3937 % (The following paragraph is taken directly from "dvitype.web")
3938 Let us therefore summarize what needs to be done. Each width in a \.{TFM}
3939 file appears as a four-byte quantity called a |fix_word|.  A |fix_word|
3940 whose respective bytes are $(a,b,c,d)$ represents the number
3941 $$x=\left\{\vcenter{\halign{$#$,\hfil\qquad&if $#$\hfil\cr
3942 b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=0;\cr
3943 -16+b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=255.\cr}}\right.$$
3944 (No other choices of $a$ are allowed, since the magnitude of a \.{TFM}
3945 dimension must be less than 16.)  We want to multiply this quantity by the
3946 integer~|z|, which is known to be less than $2^{27}$.
3947 If $|z|<2^{23}$, the individual multiplications $b\cdot z$, $c\cdot z$,
3948 $d\cdot z$ cannot overflow; otherwise we will divide |z| by 2, 4, 8, or
3949 16, to obtain a multiplier less than $2^{23}$, and we can compensate for
3950 this later. If |z| has thereby been replaced by $|z|^\prime=|z|/2^e$, let
3951 $\beta=2^{4-e}$; we shall compute
3952 $$\lfloor(b+c\cdot2^{-8}+d\cdot2^{-16})\,z^\prime/\beta\rfloor$$ if $a=0$,
3953 or the same quantity minus $\alpha=2^{4+e}z^\prime$ if $a=255$.
3954 This calculation must be
3955 done exactly, for the reasons stated above; the following program does the
3956 job in a system-independent way, assuming that arithmetic is exact on
3957 numbers less than $2^{31}$ in magnitude.
3958
3959 \def\zprime{z'}
3960
3961 @f alpha TeX
3962 @f beta TeX
3963 @f zprime TeX
3964
3965 @<Compute |zprime|, |alpha|, and |beta|@>= {
3966   zprime=s; @+ alpha=16;
3967   while(zprime>=040000000) {
3968     zprime>>=1; @+ alpha<<=1;
3969   }
3970   beta=256/alpha; @+ alpha*=zprime;
3971 }
3972
3973 @ @<Compute the character width |w|@>= {
3974   w=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta;
3975   if(b0) w-=alpha;
3976 }
3977
3978 @*GF Reading. The \.{GF} format is a ``generic font'' format. It has a lot
3979 in common with \.{DVI} format.
3980
3981 At first, names will be given for the commands in a \.{GF} file. Many
3982 commands have the same numbers as they do in a \.{DVI} file (described in
3983 the previous chapter), which makes it very convenient\biblio{This is
3984 probably on purpose for the this very reason, so that a WEB or CWEB
3985 program can use one set of named constants for reading both files.}.
3986
3987 @d paint_0 0 // Paint $d$ pixels black or white [up to 63]
3988 @d paint1 64 // Take parameter, paint pixels [up to 66]
3989 @d boc 67 // Beginning of a character picture
3990 @d boc1 68 // Short form of |boc|
3991 @d eoc 69 // End of a character picture
3992 @d skip0 70 // Skip some rows
3993 @d skip1 71 // Skip some rows [up to 73]
3994 @d new_row_0 74 // Start a new row and move right [up to 238]
3995 @d yyy 243 // Numeric specials
3996 @d no_op 244 // No operation
3997 @d char_loc 245 // Character locator
3998 @d char_loc0 246 // Short form of |char_loc|
3999
4000 @ The |font_struct| structure stores the information for each character in
4001 a font. The |raster| field points to a bitmap with eight pixels per octet,
4002 most significant bit for the leftmost pixel, each row always padded to a
4003 multiple of eight pixels.
4004
4005 While it is reading the postamble, it will fill in this structure with the
4006 |ptr| field set. After the postamble is read, it will fill in the other
4007 fields belonging to its union.
4008
4009 @<Typedefs@>=
4010 typedef struct {
4011   dvi_number dx; // character escapement in pixels
4012   dvi_number w; // width in DVI units
4013   union {
4014     struct {
4015       dvi_number min_n,max_n,min_m,max_m; // bounding box (in pixels)
4016       unsigned short n; // character code number
4017       unsigned char*raster;
4018       unsigned char flag; // bitfield of flags for this character
4019     }@+;
4020     dvi_number ptr;
4021   }@+;
4022   data_index next;
4023 } font_struct;
4024
4025 @ List of flags follows. Some of these flags might be used in order to
4026 allow$\mathord{}>256$ characters per font, since {\TeX} does not have a
4027 command to enter characters with codes more than one byte long. These
4028 flags are specified using numeric specials.
4029
4030 @d ff_select 0x01 // set high octet all characters
4031 @d ff_prefix 0x02 // set high octet for codes 128-255
4032 @d ff_roundafter 0x04 // round $\it hh$ after sending character
4033 @d ff_roundbefore 0x08 // round $\it hh$ before sending character
4034 @d ff_reset 0x10 // reset high octet
4035 @d ff_strip 0x20 // strip highest bit of prefix
4036 @d ff_space 0x40 // do not save the raster (space only)
4037 @d ff_reserved 0x80 // {\bf DO NOT USE}
4038
4039 @ @<Global variables@>=
4040 memory_of(font_struct) font_data;
4041
4042 @ @<Initialize memory@>= init_memory(font_data,4);
4043
4044 @ When loading a \.{GF} font, the filename will contain the resolution
4045 in dots per inch.
4046
4047 @^font loading@>
4048
4049 @-p data_index read_gf_file(char*fontname,dvi_measure s,dvi_measure d) {
4050   unsigned int dpi=(resolution*unit_mag*s+500*d)/(100000*d);
4051   FILE*fp;
4052   data_index index=none;
4053   data_index first_index=none;
4054   data_index last_index=none;
4055   dvi_number zprime,alpha,beta; // used for width computation
4056   @<Compute |zprime|, |alpha|, and |beta|@>;
4057  @<Figure out the filename and open the file, |return none| if it can't@>;
4058   @<Skip to the postamble of the \.{GF} file@>;
4059   @<Read the character locators@>;
4060   @<Read the character rasters and flags@>;
4061   fclose(fp);
4062   return last_index;
4063 }
4064
4065 @ When figuring out the filename, it will send it to standard output so
4066 that a list can be made of the required fonts.
4067
4068 @<Figure out the filename and open the file, ...@>= {
4069   char n[295];
4070   sprintf(n,"%s.%dgf",fontname,dpi);
4071   printf("%s\n",n);
4072   fp=open_file(n,"rb");
4073   if(!fp) return none;
4074 }
4075
4076 @ @<Skip to the postamble of the \.{GF} file@>= {
4077   int c;
4078   fseek(fp,-4,SEEK_END);
4079   while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
4080   fseek(fp,-5,SEEK_CUR);
4081   fseek(fp,get_dvi_number(fp,0,4)+37,SEEK_SET); // nothing matters anymore
4082 }
4083
4084 @ @<Read the character locators@>= {
4085   int c,b0,b1,b2,b3;
4086   dvi_number dx,w,p;
4087   for(;;) {
4088     c=fgetc(fp);
4089     if(c==post_post) break;
4090     p=-1;
4091     if(c==char_loc) {
4092       @<Read a long character locator@>;
4093     } @+else if(c==char_loc0) {
4094       @<Read a short character locator@>;
4095     } @+else if(c!=no_op) {
4096       fprintf(stderr,"Bad command in GF postamble.\n");
4097 @.Bad command in GF postamble@>
4098       fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4099       exit(1);
4100     }
4101     if(p!=-1) @<Defer this character locator into |font_data|@>;
4102   }
4103   last_index=index;
4104 }
4105
4106 @ There are some parameters we do not care about. First is $c$, which is
4107 the character code residue (modulo 256). This is not important since it
4108 is duplicated in the |boc| heading for each character. The second
4109 parameter which we do not care about is the $\it dy$ parameter, since it
4110 should be zero for \.{DVI} files.
4111
4112 @<Read a long character locator@>= {
4113   fseek(fp,1,SEEK_CUR);
4114   dx=get_dvi_number(fp,1,4)>>16;
4115   fseek(fp,4,SEEK_CUR);
4116   @<Read four bytes@>;
4117   p=get_dvi_number(fp,1,4);
4118 }
4119
4120 @ @<Read a short character locator@>= {
4121   fseek(fp,1,SEEK_CUR);
4122   dx=get_dvi_number(fp,0,1);
4123   @<Read four bytes@>;
4124   p=get_dvi_number(fp,1,4);
4125 }
4126
4127 @ @<Read four bytes@>= {
4128   b0=fgetc(fp);@+b1=fgetc(fp);@+b2=fgetc(fp);@+b3=fgetc(fp);
4129 }
4130
4131 @ This processing is deferred, and the rest of the parameters will be
4132 filled in later (and the |ptr| field will be overwritten since it will
4133 no longer be needed at that time).
4134
4135 @<Defer this character locator into |font_data|@>= {
4136   data_index n=new_record(font_data);
4137   @<Compute the character width |w|@>;
4138   font_data.data[n].next=index;
4139   font_data.data[n].dx=dx;
4140   font_data.data[n].w=w;
4141   font_data.data[n].ptr=p;
4142   if(index==none) first_index=n;
4143   index=n;
4144 }
4145
4146 @ Now is time to go through the list we made up and this time actually
4147 fill in the parameters and pictures.
4148
4149 @<Read the character rasters and flags@>= {
4150   while(index!=none) {
4151     fseek(fp,font_data.data[index].ptr,SEEK_SET);
4152     font_data.data[index].flag=0;
4153     font_data.data[index].raster=0;
4154     @<Read commands for this character@>;
4155     @#index=font_data.data[index].next;
4156   }
4157 }
4158
4159 @ Painting the picture uses the value of |paint_switch| to determine
4160 to draw or skip. The current position in the array |raster| is also
4161 pointed by the |pic| pointer. Note that |black| and |white| are not
4162 necessary black and white (but they are on normal paper).
4163
4164 Note the value of $n$ is not needed since the |pic| pointer automatically
4165 keeps track of this kinds of stuff. However, |m| is needed because of
4166 commands that can skip rows, to know how many columns must be skipped to
4167 reach the next row. There is also |b|, which keeps track of the bit
4168 position in the current byte.
4169
4170 @d white 0
4171 @d black 1
4172 @#
4173 @d reset_m
4174   m=(font_data.data[index].max_m-font_data.data[index].min_m)/8+1@;
4175
4176 @<Read commands for this character@>= {
4177   unsigned int c,m,b;
4178   unsigned char*pic;
4179   boolean paint_switch;
4180   for(;;) {
4181     c=fgetc(fp);
4182     if(c<paint1) {
4183       @<Paint |c| pixels |black| or |white|@>;
4184     } @+else if(c>=paint1 && c<paint1+3) {
4185       c=get_dvi_number(fp,0,c+1-paint1);
4186       @<Paint |c| pixels |black| or |white|@>;
4187     } @+else if(c==boc) {
4188       @<Initialize parameters and picture (long form)@>;
4189     } @+else if(c==boc1) {
4190       @<Initialize parameters and picture (short form)@>;
4191     } @+else if(c==eoc) {
4192       break; // Well Done!
4193     } @+else if(upto4(c,skip0)) {
4194       if(c==skip0) c=0;
4195       else c=get_dvi_number(fp,0,c+1-skip1);
4196       @<Finish a row and skip |c| rows@>;
4197     } @+else if(c>=new_row_0 && c<=new_row_0+164) {
4198       c-=new_row_0;
4199       @<Finish a row and skip |c| columns@>;
4200     } @+else if(c==yyy) {
4201       font_data.data[index].flag|=get_dvi_number(fp,0,4)>>16;
4202     } @+else if(c!=no_op) {
4203       fprintf(stderr,"Unknown GF command!\n");
4204 @.Unknown GF command@>
4205       fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4206     }
4207   }
4208 }
4209
4210 @ Actually |m| is something a bit different than the standard, because |m|
4211 now tells how many bytes are remaining in the current row.
4212
4213 @d pic_rows (1+font_data.data[index].max_n-font_data.data[index].min_n)
4214
4215 @<Initialize parameters and picture (long form)@>= {
4216   font_data.data[index].n=get_dvi_number(fp,0,4);
4217   @<Deal with $p$ (pointer to previous character with same metrics)@>;
4218   font_data.data[index].min_m=get_dvi_number(fp,1,4);
4219   font_data.data[index].max_m=get_dvi_number(fp,1,4);
4220   font_data.data[index].min_n=get_dvi_number(fp,1,4);
4221   font_data.data[index].max_n=get_dvi_number(fp,1,4);
4222   @<Initialize picture@>;
4223 }
4224
4225 @ @<Initialize picture@>= {
4226   if(font_data.data[index].flag&ff_space) break;
4227   paint_switch=white;
4228   reset_m;
4229   b=0;
4230   pic=font_data.data[index].raster=malloc(m*pic_rows+1);
4231   memset(pic,0,m*pic_rows);
4232 }
4233
4234 @ @<Initialize parameters and picture (short form)@>= {
4235   int d;
4236   font_data.data[index].n=get_dvi_number(fp,0,1);
4237   d=get_dvi_number(fp,0,1);
4238   font_data.data[index].max_m=get_dvi_number(fp,0,1);
4239   font_data.data[index].min_m=font_data.data[index].max_m-d;
4240   d=get_dvi_number(fp,0,1);
4241   font_data.data[index].max_n=get_dvi_number(fp,0,1);
4242   font_data.data[index].min_n=font_data.data[index].max_n-d;
4243   @<Initialize picture@>;
4244 }
4245
4246 @ The pointers to other characters will also be deferred in the same way
4247 as the character locators, but this time from the other end. Now, once it
4248 is finished all the characters, it will {\sl automatically} know to read
4249 the next one properly! (Now you can see what the purpose of the
4250 |@!first_index| variable is.)
4251
4252 @<Deal with $p$ (pointer to previous character with same metrics)@>= {
4253   dvi_number p=get_dvi_number(fp,1,4);
4254   if(p!=-1) {
4255     data_index i=new_record(font_data);
4256     font_data.data[i].next=none;
4257     font_data.data[i].dx=font_data.data[index].dx;
4258     font_data.data[i].w=font_data.data[index].w;
4259     font_data.data[i].ptr=p;
4260     font_data.data[first_index].next=i;
4261     first_index=i;
4262   }
4263 }
4264
4265 @ Now we get to the actual painting. We can assume the value of |m| is
4266 never negative and that everything else is also okay.
4267
4268 @<Paint |c| pixels |black| or |white|@>= {
4269   if(paint_switch) {
4270     if(b+c<=8) {
4271       @<Paint a small block of pixels in the current byte@>;
4272     } @+else {
4273       @<Paint the rest of the pixels in the current byte@>;
4274       @<Fill up the bytes in the middle@>;
4275       @<Clear the pixels needed clearing at the end@>;
4276     }
4277   }
4278   @<Update |paint_switch|, |pic|, |b|, and |m|@>;
4279 }
4280
4281 @ @<Update |paint_switch|, |pic|, |b|, and |m|@>= {
4282   paint_switch^=1;
4283   b+=c;
4284   pic+=b>>3;
4285   m-=b>>3;
4286   b&=7;
4287 }
4288
4289 @ @<Paint a small block of pixels in the current byte@>= {
4290   *pic|=(0xFF>>b)&~(0xFF>>(b+c));
4291 }
4292
4293 @ @<Paint the rest of the pixels in the current byte@>= {
4294   *pic|=0xFF>>b;
4295 }
4296
4297 @ @<Fill up the bytes in the middle@>= {
4298   memset(pic+1,0xFF,(c+b)>>3);
4299 }
4300
4301 @ @<Clear the pixels needed clearing at the end@>= {
4302   pic[(c+b)>>3]&=~(0xFF>>((c+b)&7));
4303 }
4304
4305 @ @<Finish a row and skip |c| rows@>= {
4306   pic+=m;
4307   b=0;
4308   reset_m;
4309   pic+=m*c;
4310   paint_switch=white;
4311 }
4312
4313 @ @<Finish a row and skip |c| columns@>= {
4314   pic+=m;
4315   reset_m;
4316   m-=c>>3;
4317   pic+=c>>3;
4318   b=c&7;
4319   paint_switch=black;
4320 }
4321
4322 @ @<Display font information@>= {
4323   data_index i;
4324   foreach(i,font_data) {
4325     printf("[%d] box=(%d,%d,%d,%d) dx=%d w=%d n=%d flag=%d [%d]\n"
4326       ,i,font_data.data[i].min_n,font_data.data[i].max_n
4327         ,font_data.data[i].min_m,font_data.data[i].max_m
4328       ,font_data.data[i].dx,font_data.data[i].w,font_data.data[i].n
4329       ,font_data.data[i].flag,font_data.data[i].next
4330     );
4331   }
4332 }
4333
4334 @*Layer Computation. Now is the chapter for actually deciding rendering on
4335 the page, where everything should go, etc.$^{[\TeXwareBiblio]}$
4336
4337 @<Global variables@>=
4338 dvi_measure unit_num; // Numerator for units of measurement
4339 dvi_measure unit_den; // Denominator for units of measurement
4340 dvi_measure unit_mag; // Magnification for measurement
4341 dvi_measure unit_conv; // Conversion factor
4342
4343 @ There are also a number of ``internal typesetting quantities''. These
4344 are parameters stored in a separate array, and are used to keep track of
4345 the current state of the typesetting. They are labeled with letters from
4346 \.A to \.Z. They can be modified inside of specials, although some of them
4347 probably shouldn't be modified in this way. Here is the list of them:
4348
4349 \.A, \.B: Horizontal and vertical offset added to \.I and \.J.
4350
4351 \.C: Character code prefix. If bit eight is not set, it only affects
4352 character codes with bit seven set.
4353
4354 \.D: Maximum horizontal drift (in pixels), meaning how far away the \.I
4355 and \.J parameters are allowed to be from the correctly rounded values.
4356
4357 \.E: Maximum vertical drift.
4358
4359 \.F: The current font.
4360
4361 \.H: The horizontal position on the page, in DVI units.
4362
4363 \.I: The horizontal position on the page, in pixels.
4364
4365 \.J: The vertical position on the page, in pixels.
4366
4367 \.L: The current layer number. If this is zero, nothing is placed on the
4368 page, although the positions can still be changed and specials can still
4369 be used.
4370
4371 \.P: Page number. This is used to determine the filename of output.
4372
4373 \.R, \.S: The limits for when horizontal motion should add the number of
4374 pixels or when it should recalculate the pixels entirely.
4375
4376 \.T, \.U: Like \.R and \.S, but for vertical motions.
4377
4378 \.V: The vertical position on the page, in DVI units.
4379
4380 \.W, \.X, \.Y, \.Z: The current spacing amounts, in DVI units.
4381
4382 @d quan(_name) (type_quan[(_name)&0x1F])
4383
4384 @<Global variables@>=
4385 dvi_number type_quan[32];
4386
4387 @ @<Cases for system commands@>=
4388   @-case 'm': {
4389     // Modify an internal typesetting quantity
4390     if(stack_ptr->is_string) program_error("Type mismatch");
4391     quan(*++ptr)=pop_num();
4392     break;
4393   }@#
4394
4395 @ The conversion factor |unit_conv| is figured as follows: There are
4396 exactly |unit_num/unit_den| decimicrons per DVI unit, and 254000
4397 decimicrons per inch, and |resolution/100| pixels per inch. Then we have
4398 to adjust this by the magnification |unit_mag|.
4399
4400 Division must be done slightly carefully to avoid overflow.
4401
4402 @d resolution (registers['D'].number)
4403
4404 @<Compute the conversion factor@>= {
4405   unit_conv=make_fraction(unit_num*resolution*unit_mag,unit_den*100000);
4406   unit_conv/=254000;
4407 }
4408
4409 @ Here are the codes to compute movements. The definition of \.{DVI} files
4410 refers to six registers which hold integer values in DVI units. However,
4411 we also have two more registers, for horizontal and vertical pixel units.
4412
4413 A sequence of characters or rules might cause the pixel values to drift
4414 from their correctly rounded values, since they are not usually an exact
4415 integer number of pixels.
4416
4417 @d to_pixels(_val) round((_val)*unit_conv)
4418
4419 @-p void horizontal_movement(dvi_number x) {
4420   quan('H')+=x;
4421   if(x>quan('S') || x<quan('R')) {
4422     quan('I')=to_pixels(quan('H'));
4423   } @+else {
4424     quan('I')+=to_pixels(x);
4425     if(to_pixels(quan('H'))-quan('I')>quan('D'))
4426       quan('I')=to_pixels(quan('H'))+quan('D');
4427     if(to_pixels(quan('H'))-quan('I')<-quan('D'))
4428       quan('I')=to_pixels(quan('H'))-quan('D');
4429   }
4430 }
4431
4432 @ @-p void vertical_movement(dvi_number x) {
4433   quan('V')+=x;
4434   if(x>quan('U') || x<quan('T')) {
4435     quan('J')=to_pixels(quan('V'));
4436   } @+else {
4437     quan('J')+=to_pixels(x);
4438     if(to_pixels(quan('V'))-quan('J')>quan('E'))
4439       quan('J')=to_pixels(quan('V'))+quan('E');
4440     if(to_pixels(quan('V'))-quan('J')<-quan('E'))
4441       quan('J')=to_pixels(quan('V'))-quan('E');
4442   }
4443 }
4444
4445 @ This is now the part that does actual sending. When many characters
4446 come next to each other, the rounding will be done such that the number
4447 of pixels between two letters will always be the same whenever those two
4448 letters occur next to each other.
4449
4450 @<Typeset character |c| on the current layer@>= {
4451   data_index n=fontindex[quan('F')&0xFF];
4452   if((quan('C')&0x100) || (c&0x80)) c|=quan('C')<<8;
4453   while(n!=none && c!=font_data.data[n].n)
4454     n=font_data.data[n].next;
4455   if(n==none) dvi_error(fp,"Character not in font");
4456   @<Typeset the character and update the current position@>;
4457   @<Update the character code prefix@>;
4458 }
4459
4460 @ @<Typeset the character and update the current position@>= {
4461   if(font_data.data[n].flag&ff_roundbefore)
4462     quan('I')=to_pixels(quan('H'));
4463   if(quan('L') && font_data.data[n].raster) typeset_char_here(n);
4464   if(moveaftertyping) {
4465     quan('H')+=font_data.data[n].w;
4466     quan('I')+=font_data.data[n].dx;
4467     if(font_data.data[n].flag&ff_roundafter)
4468       quan('I')=to_pixels(quan('H'));
4469     else horizontal_movement(0);
4470   }
4471 }
4472
4473 @ If you have a typesetting program that can ship out characters with
4474 codes more than eight bits long, you won't need this. It is provided for
4475 use with normal {\TeX} system.
4476
4477 @<Update the character code prefix@>= {
4478   if(font_data.data[n].flag&ff_strip) c&=0x7F; else c&=0xFF;
4479   if(font_data.data[n].flag&ff_select) quan('C')=c|0x100;
4480   if(font_data.data[n].flag&ff_prefix) quan('C')=c;
4481   if(font_data.data[n].flag&ff_reset) quan('C')=0;
4482 }
4483
4484 @ The number of pixels in the height or width of a rule will always be
4485 rounded up. However, unlike DVItype, this program has no floating point
4486 rounding errors.
4487
4488 @d to_rule_pixels(_val) ceiling((_val)*unit_conv)
4489
4490 @<Typeset |a| by |c| rule on the current layer@>= {
4491   dvi_number x=to_rule_pixels(a);
4492   dvi_number y=to_rule_pixels(c);
4493   if(quan('L') && a>0 && c>0) typeset_rule_here(x,y);
4494   if(moveaftertyping) {
4495     quan('I')+=x;
4496     horizontal_movement(0);
4497   }
4498 }
4499
4500 @ Sometimes you might want DVI units converted to pixels inside of a user
4501 program contained in a DVI file. Here is how it is done.
4502
4503 @<Cases for system commands@>=
4504   @-case 'C': {
4505     // Convert DVI units to pixels
4506     if(stack_ptr->is_string) program_error("Type mismatch");
4507     stack_ptr->number=to_pixels(stack_ptr->number);
4508     break;
4509   }@#
4510
4511 @*Layer Rendering. Please note, these numbers are |short|, which means
4512 that you cannot have more than 65536 pixels in width or in height. This
4513 should not be a problem, because even if you have 3000 dots per inch, and
4514 each card is 10 inches long, that is still only 30000 which is less than
4515 half of the available width. (All units here are in pixels.)
4516
4517 In order to save memory, all typeset nodes are stored in one list at
4518 first, and then rendered to a pixel buffer as each layer is being written
4519 out to the \.{PBM} file, and then the buffer can be freed (or reset to
4520 zero) afterwards to save memory.
4521
4522 @<Typedefs@>=
4523 typedef struct {
4524   unsigned short x; // X position on page
4525   unsigned short y; // Y position on page
4526   union {
4527     struct {
4528       unsigned short w; // Width of rule
4529       unsigned short h; // Height of rule
4530     }@+;
4531     data_index c; // Character index in |font_data|
4532   }@+;
4533   unsigned char l; // Layer (high bit set for rules)
4534 } typeset_node;
4535
4536 @ @<Global variables@>=
4537 memory_of(typeset_node) typeset_nodes;
4538
4539 @ @<Initialize memory@>= init_memory(typeset_nodes,8);
4540
4541 @ We also have variables for the layer size (loaded from \.{\\count2}
4542 and \.{\\count3} registers for the current page). If they are both zero,
4543 then nothing will be rendered.
4544
4545 @<Global variables@>=
4546 unsigned short layer_width;
4547 unsigned short layer_height;
4548
4549 @ Here are the subroutines which typeset characters and rules onto the
4550 page buffer. They are not rendered into a picture yet.
4551
4552 @d typeset_new_page() (typeset_nodes.used=0)
4553 @d typeset_rule_here(_w,_h) typeset_rule(quan('I'),quan('J'),(_w),(_h));
4554 @d typeset_char_here(_ch) typeset_char(quan('I'),quan('J'),(_ch));
4555
4556 @-p void typeset_rule(int x,int y,int w,int h) {
4557   data_index n=new_record(typeset_nodes);
4558   @<Ensure |w| and |h| are not too large to fit on the page@>;
4559   typeset_nodes.data[n].x=x;
4560   typeset_nodes.data[n].y=y;
4561   typeset_nodes.data[n].w=w;
4562   typeset_nodes.data[n].h=h;
4563   typeset_nodes.data[n].l=quan('L')|0x80;
4564 }
4565
4566 @ @<Ensure |w| and |h| are not too large to fit on the page@>= {
4567   if(x+w>layer_width) w=layer_width-x;
4568   if(y+h>layer_height) h=layer_height-y;
4569 }
4570
4571 @ @-p void typeset_char(int x,int y,data_index c) {
4572   data_index n=new_record(typeset_nodes);
4573   typeset_nodes.data[n].x=x;
4574   typeset_nodes.data[n].y=y;
4575   typeset_nodes.data[n].c=c;
4576   typeset_nodes.data[n].l=quan('L');
4577 }
4578
4579 @ Here is a variable |image|. This is a pointer to the buffer for the
4580 picture of the current layer, in \.{PBM} format. The internal quantity
4581 \.L should be set now to the largest layer number in use, at the end of
4582 the page, because it is used to determine how many layers must be sent to
4583 the output.
4584
4585 @d image_max (image+layer_size)
4586
4587 @<Global variables@>=
4588 unsigned char*image;
4589
4590 @ @<Render this page@>= {
4591   unsigned int row_size=((layer_width+7)>>3);
4592   unsigned int layer_size=row_size*layer_height;
4593   image=malloc(layer_size+1);
4594   while(quan('L')) {
4595     memset(image,0,layer_size);
4596     @<Read the |typeset_nodes| list and render any applicable nodes@>;
4597     @<Send the current layer to a file@>;
4598     --quan('L');
4599   }
4600   free(image);
4601 }
4602
4603 @ @<Read the |typeset_nodes| list and render any applicable nodes@>= {
4604   data_index i;
4605   foreach(i,typeset_nodes) {
4606     if((typeset_nodes.data[i].l&0x7F)==quan('L')) {
4607       if(typeset_nodes.data[i].l&0x80) {
4608         @<Render a rule node@>;
4609       } @+else {
4610         @<Render a character node@>;
4611       }
4612     }
4613   }
4614 }
4615
4616 @ In order to render a rule node (which is a filled |black| rectangle), it
4617 is split into rows, and each row is split into three parts: the left end,
4618 the filling, and the right end. However, if the width is sufficiently
4619 small, it will fit in one byte and will not have to be split in this way.
4620
4621 There are also some checks to ensure that the entire rectangle will be
4622 within the bounds of the image.
4623
4624 @<Render a rule node@>= {
4625   int y=1+typeset_nodes.data[i].y-typeset_nodes.data[i].h;
4626   int x=typeset_nodes.data[i].x;
4627   int w=typeset_nodes.data[i].w;
4628   if(y<0) y=0;
4629   if(typeset_nodes.data[i].y>=layer_height)
4630     typeset_nodes.data[i].y=layer_height-1;
4631   if((x&7)+w<=8) {
4632     @<Render a small rule node@>;
4633   } @+else {
4634     @<Render a large rule node@>;
4635   }
4636 }
4637
4638 @ @<Render a small rule node@>= {
4639   for(;y<=typeset_nodes.data[i].y;y++) {
4640     image[y*row_size+(x>>3)]|=(0xFF>>(x&7))&~(0xFF>>((x&7)+w));
4641   }
4642 }
4643
4644 @ @<Render a large rule node@>= {
4645   for(;y<=typeset_nodes.data[i].y;y++) {
4646     unsigned char*p=image+(y*row_size+(x>>3));
4647     *p++|=0xFF>>(x&7); // left
4648     memset(p,0xFF,((x&7)+w)>>3); // filling
4649     p[((x&7)+w)>>3]|=~(0xFF>>((x+w)&7)); // right
4650   }
4651 }
4652
4653 @ Character nodes are a bit different. The pictures are already stored,
4654 now we have to paste them into the layer picture. Since they will not
4655 always be aligned to a multiple to eight columns (one byte), it will have
4656 to shift out and shift in.
4657
4658 Again, it is necessary to ensure it doesn't go out of bounds. It has to be
4659 a bit more careful for characters than it does for rules. Also note that
4660 the \.{GF} format does not require that |min_m| and so on are the tightest
4661 bounds possible.
4662
4663 @<Render a character node@>= {
4664   unsigned int ch=typeset_nodes.data[i].c;
4665   unsigned int x=typeset_nodes.data[i].x+font_data.data[ch].min_m;
4666   unsigned int y=typeset_nodes.data[i].y-font_data.data[ch].max_n;
4667   unsigned int z=typeset_nodes.data[i].y-font_data.data[ch].min_n;
4668   unsigned int w=(font_data.data[ch].max_m-font_data.data[ch].min_m)/8+1;
4669   register unsigned char sh=x&7; // shifting amount for right shift
4670   register unsigned char lsh=8-sh; // shifting amount for left shift
4671   unsigned char*p=image+(y*row_size+(x>>3));
4672   unsigned char*q=font_data.data[ch].raster;
4673   @<Cut off the part of character above the top of the layer image@>;
4674   while(y<=z && p+w<image_max) {
4675     @<Render the current row of the character raster@>;
4676     @<Advance to the next row of the character@>;
4677   }
4678 }
4679
4680 @ @<Cut off the part of character above the top of the layer image@>= {
4681   if(y<0) {
4682     p-=row_size*y;
4683     q-=w*y;
4684     y=0;
4685   }
4686   if(p<image) p=image;
4687 }
4688
4689 @ @<Render the current row of the character raster@>= {
4690   int j;
4691   for(j=0;j<w;j++) {
4692     p[j]|=q[j]>>sh;
4693     p[j+1]|=q[j]<<lsh;
4694   }
4695 }
4696
4697 @ @<Advance to the next row of the character@>= {
4698   y++;
4699   q+=w;
4700   p+=row_size;
4701 }
4702
4703 @ Layer files are output in \.{PBM} format, which is very similar to the
4704 format which this program uses internally. ImageMagick is capable of
4705 reading this format.
4706
4707 @.PBM@>
4708 @^Portable Bitmap@>
4709 @^ImageMagick@>
4710 @^output@>
4711
4712 @<Send the current layer to a file@>= {
4713   FILE*fp;
4714   char filename[256];
4715   sprintf(filename,"P%dL%d.pbm",quan('P'),quan('L'));
4716   fp=fopen(filename,"wb");
4717   fprintf(fp,"P4%d %d ",layer_width,layer_height);
4718   fwrite(image,1,layer_size,fp);
4719   fclose(fp);
4720 }
4721
4722 @ @<Display the list of typeset nodes@>= {
4723   data_index i;
4724   foreach(i,typeset_nodes) {
4725     if(typeset_nodes.data[i].l&0x80) {
4726       printf("[%d] %dx%d%+d%+d\n",typeset_nodes.data[i].l&0x7F
4727         ,typeset_nodes.data[i].w,typeset_nodes.data[i].h
4728         ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4729       );
4730     } @+else {
4731       printf("[%d] %d(%d) %+d%+d\n",typeset_nodes.data[i].l
4732         ,typeset_nodes.data[i].c,font_data.data[typeset_nodes.data[i].c].n
4733         ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4734       );
4735     }
4736   }
4737 }
4738
4739 @ @<Display typesetting diagnostics@>= {
4740   int i;
4741   for(i=0;i<32;i++) {
4742     if(type_quan[i]) printf("%c=%d\n",i+'@@',type_quan[i]);
4743   }
4744   printf("unit_conv: %lld [%d]\n",unit_conv,round(unit_conv));
4745   printf("nodes: %d/%d\n",typeset_nodes.used,typeset_nodes.allocated);
4746   printf("fonts: %d/%d\n",font_data.used,font_data.allocated);
4747   if(dvi_stack) printf("stack: %d\n",dvi_stack_ptr-dvi_stack);
4748 }
4749
4750 @*Process of ImageMagick. The filename of ImageMagick \.{convert} is found
4751 by using the \.{IMCONVERT} environment variable. The entire command-line
4752 is stored in the \.Q register, with arguments separated by spaces, and it
4753 might be very long.
4754
4755 @^ImageMagick@>
4756 @.IMCONVERT@>
4757
4758 @d add_magick_arg(_val) magick_args.data[new_record(magick_args)]=_val
4759
4760 @<Typedefs@>=
4761 typedef char*char_ptr;
4762
4763 @ @<Global variables@>=
4764 memory_of(char_ptr) magick_args;
4765
4766 @ @<Switch to ImageMagick@>= {
4767   init_memory(magick_args,4);
4768   add_magick_arg("convert"); // |argv[0]| (program name)
4769   @<Add arguments from \.Q register@>;
4770   add_magick_arg(0); // (terminator)
4771   @<Call the ImageMagick executable file@>;
4772 }
4773
4774 @ The \.Q register will be clobbered here. But that is OK since it will no
4775 longer be used within \TeX nicard.
4776
4777 @<Add arguments from \.Q register@>= {
4778   char*q=registers['Q'].text;
4779   char*p;
4780   while(q && *q) {
4781     p=q;
4782     if(q=strchr(q,' ')) *q++=0;
4783     if(*p) add_magick_arg(p);
4784   }
4785 }
4786
4787 @ @<Call the ImageMagick executable file@>= {
4788   char*e=getenv("IMCONVERT");
4789   if(!e) @<Display the arguments and quit@>;
4790   execv(e,magick_args.data);
4791   fprintf(stderr,"Unable to run ImageMagick\n");
4792 @.Unable to run ImageMagick@>
4793   return 1;
4794 }
4795
4796 @ @<Display the arguments and quit@>= {
4797   data_index i;
4798   char*p;
4799   foreach(i,magick_args) if(p=magick_args.data[i]) printf("%s\n",p);
4800   return 0;
4801 }
4802
4803 @*Internal Typesetting. Until now, we only had the codes for doing
4804 external typesetting and image manipulation (which was the original plan
4805 for this program). Now, we are adding internal typesetting and image
4806 manipulation as well, to avoid external dependencies.
4807
4808 Some of the algorithms of \TeX\ will be used here, with some changes. For
4809 example, there are no leaders, marks, footnotes, alignments, mathematical
4810 formulas, or hyphenation. Ligature nodes are not needed either, because
4811 there is no hyphenation, so we can just use normal character nodes for
4812 ligatures.
4813
4814 There is also no page breaking, although you can still do vertical
4815 splitting if you want multiple columns of text on a card, or for the text
4816 to be interrupted in the middle.
4817
4818 @ Here is a list of the category codes used for internal typesetting, and
4819 the code to initialize that table and the other tables. There are also
4820 category codes from 32 to 255, which mean that it is a register number
4821 containing a code to execute (we set up |tabulation| and |escape_code| to
4822 call registers \.t and \.e, although it is unlikely to use these tokens).
4823
4824 @d cat_ignore 0 // Ignore this token
4825 @d cat_norm 1 // Add a character from the current font
4826 @d cat_space 2 // Add a glue node with the current space factor
4827 @d cat_exit 3 // Exit the current block
4828 @d cat_accent 4 // Add an accent to the next character
4829 @d cat_xaccent 5 // As above, but XOR 128
4830
4831 @<Initialize tables for internal typesetting@>= {
4832   for(i=0;i<256;i++) {
4833     tables['E'][i]=1;
4834     tables['F'][i]=40;
4835     tables['J'][i]=tables['K'][i]=128;
4836   }@#
4837   tables['E'][null_char]=cat_ignore;
4838   tables['E'][end_transmission]=cat_exit; // Not actually used
4839   tables['E'][tabulation]='t';
4840   tables['E'][escape_code]='e';
4841   tables['E'][record_separator]=cat_exit;
4842   tables['E'][field_separator]=cat_exit;
4843   tables['E'][' ']=cat_space;
4844 }
4845
4846 @ All dimensions are stored in units of scaled points (where there are
4847 65536 scaled points in one point, and $72.27$ points in one inch).
4848
4849 There will also be a type for glue ratios, which is used to multiply by
4850 glue stretch and shrink inside of a box, where a value of |0x100000000|
4851 means 100\char`\%\relax\space stretch or shrink, or 1pt per fil unit.
4852
4853 @<Typedefs@>=
4854 typedef signed int scaled;
4855 typedef signed long long int glue_ratio;
4856
4857 @*Data Structures for Boxes. Typesetting is done first by storing
4858 horizontal and vertical boxes of nodes. These boxes may then be included
4859 in other boxes, or shipped out to the next part of the program, which is
4860 image manipulation.
4861
4862 Here we list the possible kind of nodes. These are four-bit numbers, with
4863 bit 3 set for a breakable\slash discardable node. The four high bits are
4864 used as a small parameter for the node.
4865
4866 There are structures for many kinds of nodes, but only one pointer type
4867 will be used. Unions are used to allow many kinds of nodes at once.
4868
4869 @d chars_node 00 // One word of text (including kerns, ligatures, accents)
4870 @d hlist_node 01 // Horizontal box
4871 @d vlist_node 02 // Vertical box
4872 @d rule_node 03 // Filled rectangle
4873 @d adjust_node 04 // Add material before or after current line
4874 @d special_node 05 // Execute commands when this node is found
4875 @d layer_node 06 // Like |special_node| but with only one purpose
4876 @d kern_node 010 // Fixed movement
4877 @d glue_node 011 // Variable movement
4878 @d penalty_node 012 // Tell how bad it is to break a line/page here
4879 @#
4880 @d type_of(_node) ((_node)->type_and_subtype&0x0F)
4881 @d subtype_of(_node) ((_node)->type_and_subtype>>4)
4882 @s box_node int
4883 @d calc_size(_members) (sizeof(struct{
4884   struct box_node*y;unsigned char z;struct{_members}@+;
4885 }))
4886
4887 @<Typedefs@>=
4888 typedef struct box_node {
4889   struct box_node*next; // next node, or 0
4890   unsigned char type_and_subtype;
4891   union @+{
4892     @<Structure of a |chars_node|@>;
4893     @<Structure of a |hlist_node|, |vlist_node|, or |rule_node|@>;
4894     @<Structure of a |adjust_node|@>;
4895     @<Structure of a |special_node|@>;
4896     @<Structure of a |layer_node|@>;
4897     @<Structure of a |kern_node|@>;
4898     @<Structure of a |glue_node|@>;
4899     @<Structure of a |penalty_node|@>;
4900   }@+;
4901 } box_node;
4902 @#
4903
4904 @ In a |chars_node|, there is a font number (0 to 255), and then sixteen
4905 bits for each character, accent, or kern. Data |0x0000| to |0x7FFF| adds a
4906 character (so only 32768 characters are available, while \TeX\ supports
4907 only 256 characters, so it is still more than \TeX), data |0x8000| to
4908 |0xBFFF| specifies an accent for the next character (so only characters
4909 numbered 0 to 16383 can be used as accents), |0xC000| to |0xFFFE| are
4910 implicit kerns (allowing only 16383 possible kerns, although most fonts
4911 use only ten or so, certainly not as many as sixteen thousand), and data
4912 |0xFFFF| is a terminator. All characters are from the same font.
4913
4914 If an accent is specified, it is added to the immediately next character
4915 in this list.
4916
4917 @d sizeof_chars_node calc_size(unsigned char a;unsigned short b[0];)
4918
4919 @<Structure of a |chars_node|@>=
4920 struct {
4921   unsigned char font;
4922   unsigned short chars[0];
4923 }
4924
4925 @ An |hlist_node|, |vlist_node|, and |rule_node| are all similar to each
4926 other, except that a |rule_node| does not have a |list| or |glue_set|, and
4927 a |hlist_node| has an additional |tracking| parameter.
4928
4929 Tracking is 128 for normal width of each letter. They can be adjusted to a
4930 lesser number to make the letters closer together, or greater to make
4931 farther apart leters, for example 64 means half of normal width.
4932
4933 The |subtype_of| a |hlist_node| or |vlist_node| is the glue set order,
4934 setting the high bit for shrinking (otherwise it is stretching).
4935
4936 @d sizeof_hlist_node calc_size(
4937   scaled a;scaled b;scaled c;scaled d;
4938   struct box_node*e;glue_ratio f;unsigned char g;
4939 )
4940 @d sizeof_vlist_node calc_size(
4941   scaled a;scaled b;scaled c;scaled d;
4942   struct box_node*e;glue_ratio f;
4943 )
4944 @d sizeof_rule_node calc_size(scaled a;scaled b;scaled c;scaled d;)
4945
4946 @<Structure of a |hlist_node|...@>=
4947 struct {
4948   scaled width;
4949   scaled height;
4950   scaled depth;
4951   scaled shift_amount; // shift this box by the specified amount
4952   @#
4953   struct box_node*list; // pointer to first child node
4954   glue_ratio glue_set;
4955   @#
4956   unsigned char tracking; // adjust letter spacing
4957 }
4958
4959 @ An |adjust_node| has only a pointer to the sublist, and the |subtype_of|
4960 should be zero to append the vertical material after this line of the
4961 paragraph, or one to put it before this line of the paragraph.
4962
4963 @d sizeof_adjust_node calc_size(struct box_node*a;)
4964
4965 @<Structure of a |adjust_node|@>=
4966 struct {
4967   struct box_node*sublist; // pointer to first child node
4968 }
4969
4970 @ A |special_node| contains a null-terminated C string. The |subtype_of|
4971 specifies how it is used; they are listed below.
4972
4973 @d spec_measure 1 // Measuring the length of a line in a paragraph
4974 @d spec_break 2 // Breaking a paragraph
4975 @d spec_pack 3 // Packaging a box
4976 @d spec_vbreak 4 // Breaking a vertical box
4977 @d spec_render 5 // Shipping out the nodes to the page
4978 @#
4979 @d sizeof_special_node calc_size(char a[0];)
4980
4981 @<Structure of a |special_node|@>=
4982 struct {
4983   char program[0];
4984 }
4985
4986 @ A |layer_node| acts like a |special_node| with subtype |spec_render| and
4987 the |program| set to |"3mL"| if the |layer| parameter is 3. It is probably
4988 a more common kind of special.
4989
4990 For example, it might be used to specify typing in different colors.
4991
4992 @d sizeof_layer_node calc_size(unsigned char a;)
4993
4994 @<Structure of a |layer_node|@>=
4995 struct {
4996   unsigned char layer;
4997 }
4998
4999 @ A |kern_node| represents a horizontal or vertical movement, such as
5000 where some amount of space is skipped.
5001
5002 @d sizeof_kern_node calc_size(scaled a;)
5003
5004 @<Structure of a |kern_node|@>=
5005 struct {
5006   scaled distance;
5007 }
5008
5009 @ A |glue_node| is similar to a |kern_node| although there are some
5010 differences. One difference is that it can stretch and shrink. The
5011 |subtype_of| parameter has the stretch order in the low two bits and the
5012 shrink order in the high two bits.
5013
5014 @d finite 0
5015 @d fil 1
5016 @d fill 2
5017 @d filll 3
5018 @#
5019 @d sizeof_glue_node calc_size(scaled a;scaled b;scaled c;)
5020
5021 @<Structure of a |glue_node|@>=
5022 struct {
5023   scaled natural;
5024   scaled stretch;
5025   scaled shrink;
5026 }
5027
5028 @ A |penalty_node| specifies a valid breakpoint in a paragraph, and in
5029 addition, specifies how bad it is to break here. A penalty value 10000001
5030 is bad enough that it will not break here, and $-10000001$ is good enough
5031 that it will definitely break here.
5032
5033 @d sizeof_penalty_node calc_size(signed int a;)
5034
5035 @<Structure of a |penalty_node|@>=
5036 struct {
5037   signed int penalty;
5038 }
5039
5040 @ Here are functions for manipulation of box nodes, including creation,
5041 destruction, and so on.
5042
5043 First is simple creation of a node. It sets nothing other than type and
5044 subtype.
5045
5046 @-p box_node*create_node(int type,int subtype,int size) {
5047   box_node*ptr=malloc(size);
5048   ptr->next=0;
5049   ptr->type_and_subtype=(type&0x0F)|(subtype<<4);
5050   return ptr;
5051 }
5052
5053 @ Now is destruction. It is recursive because some nodes are boxes that
5054 point to other lists too.
5055
5056 @-p void trash_nodes(box_node*this) {
5057   box_node*next;
5058   while(this) {
5059     next=this->next;
5060     @<Recurse if there is a sublist to trash@>;
5061     free(this);
5062     this=next;
5063   }
5064 }
5065
5066 @ @<Recurse if there is a sublist to trash@>= {
5067   switch(type_of(this)) {
5068     case hlist_node: case vlist_node: @/
5069       trash_nodes(this->list); @+break;
5070     case adjust_node: @/
5071       trash_nodes(this->sublist); @+break;
5072     default: ; // Do nothing
5073   }
5074 }
5075
5076 @ You might realize there are no reference counts. They aren't needed,
5077 because each node is used exactly once. (Later on in the semantic nest, it
5078 is seen that this is not quite true; the box nest also includes a
5079 reference, which is in addition to the |next| pointers of each node, but
5080 this is OK since those are nodes are never isolated or destroyed when
5081 picked off of that list.)
5082
5083 @*Font Metric Data. In order to do internal typesetting, it is necessary
5084 to load the font metric data from a \.{TFM} file. The data in a \.{TFM}
5085 file consists of 32-bit words in big-endian order.
5086
5087 However, the first 6 words are twelve 16-bit integers instead, giving
5088 lengths of various parts of the file.
5089
5090 @s fix_word int
5091 @ The most important data type used here is a |fix_word|, which is a
5092 32-bit signed number, with 12 integer bits and 20 fractional bits. Most of
5093 the |fix_word| values in a \.{TFM} file range from $-16$ to $+16$.
5094
5095 @<Typedefs@>=
5096 typedef signed int fix_word;
5097
5098 @ The twelve lengths are according to the following:
5099
5100 \hbox to\hsize{\hfil\vbox{\smallskip\halign{\hfil$\it#={}$&#\hfil\cr
5101   lf&length of the entire file, in words\cr
5102   lh&number of words of header data\cr
5103   bc&smallest character code in this font\cr
5104   ec&largest character code in this font\cr
5105   nw&number of words in the width table\cr
5106   nh&number of words in the height table\cr
5107   nd&number of words in the depth table\cr
5108   ni&number of words in the italic correction table\cr
5109   nl&number of words in the ligature/kern program\cr
5110   nk&number of words in the kern table\cr
5111   ne&number of words in the extensible character table\cr
5112   np&number of font parameter words\cr
5113 }\smallskip}\hfil}
5114
5115 \noindent The parts of the file are in the order listed above. Some of the
5116 sections of the file are not used by this program (the extensible
5117 characters and the header words), but they still must be skipped over when
5118 reading the \.{TFM} file. Also, the $\it lf$ parameter is only for
5119 verification, and this program does not attempt to verify it.
5120
5121 @ Here is data structures for storing information about font metrics. It
5122 is a managed memory. Some elements will be shared by multiple fonts that
5123 use the same \.{TFM} file, such as |design_size|, |fontname|, and the
5124 ligature/kerning programs.
5125
5126 @<Late Typedefs@>=
5127 typedef struct {
5128   scaled parameters[16]; // Font parameters (up to sixteen)
5129   scaled at_size; // At size, for figuring out \.{GF} filename
5130   scaled design_size; // Design size, for figuring out \.{GF} filename
5131   char*fontname; // Name of font, without extension or area
5132   scaled*width_base;
5133   scaled*height_base;
5134   scaled*depth_base;
5135   scaled*italic_base;
5136   scaled*kern_base;
5137   unsigned char min_char; // Smallest valid character code
5138   unsigned char max_char; // Largest valid character code
5139   int right_boundary; // If this is |none| then there is no right boundary
5140   unsigned char lig_limit; // Code |x| ligatures if |x<256*lig_limit|
5141   @<More elements of |font_metric_data|@>@;
5142 } font_metric_data;
5143
5144 @ @<Global variables@>=
5145 memory_of(font_metric_data) metrics;
5146
5147 @ @<Initialize memory@>= init_memory(metrics,4);
5148
5149 @ Now the ligature/kerning program. The purpose of these fields is
5150 explained later.
5151
5152 @<Typedefs@>=
5153 typedef struct {
5154   unsigned char skip;
5155   unsigned char next;
5156   unsigned char op;
5157   unsigned char remainder;
5158 } lig_kern_command;
5159
5160 @ Some fonts will have a fake ``left boundary character'', which is
5161 implied at the beginning of each word. This points to the command which
5162 should become active at the beginning of a word. If it is null, then no
5163 ligature/kerning program will be active.
5164
5165 @<More elements of |font_metric_data|@>=
5166   lig_kern_command*left_boundary; // Program for left boundary character
5167
5168 @ Another thing is the character info. These are the same data for
5169 different sizes of the same font, since they are index into the other
5170 arrays, which are different for each font.
5171
5172 @<Typedefs@>=
5173 typedef struct {
5174   unsigned char width; // Index into |width_base|
5175   unsigned char height; // Index into |height_base|
5176   unsigned char depth; // Index into |depth_base|
5177   unsigned char italic; // Index into |italic_base|
5178   lig_kern_command*program; // Program for this character (null if none)
5179 } char_info_data;
5180
5181 @ @<More elements of |font_metric_data|@>=
5182   char_info_data*info; // |info[c]| is info for character code |c|
5183
5184 @ So let's get started, please.
5185
5186 The parameter |fontnum| shall be the font number of the first size of this
5187 font set up. The |fontname| is the name of the font, without extension.
5188 The |at_size| parameter points to the beginning of a zero-terminated list
5189 of at-sizes to load the font at (much of the data is the same for
5190 different at-sizes so that we can save memory in this way). However, the
5191 |at_size| values are |scaled|, while the \.{TFM} expects |fix_word|. This
5192 is easy to correct by right-shifting four spaces.
5193
5194 The |fix_word| values are in the same format as numbers in a \.{DVI} file,
5195 so the same code can be used. A macro is set here to make convenience.
5196
5197 @d get_fix_word(_fp) ((fix_word)get_dvi_number((_fp),1,4))
5198
5199 @-p void load_tfm(unsigned char fontnum,char*fontname,scaled*at_size) {
5200   char filename[max_filename_length+1];
5201   short lengths[12]; // The data described above, now numbered 0 to 11
5202   lig_kern_command*program; // Beginning of ligature/kerning program
5203   font_metric_data common_data; // Data common to all sizes of a font
5204   data_index metrics_index=metrics.used; // Index into |metrics|
5205   int num_sizes=0; // How many fonts we are loading at once
5206   int w_offset; // Offset of width table
5207   FILE*fp;
5208   @<Set up the filename of the \.{TFM} file and try to open the file@>;
5209   @<Load the |lengths| data@>;
5210   @<Set up |common_data| and |program|@>;
5211   @<Skip the header words@>;
5212   @<Load the character info@>;
5213   @<Set |w_offset|, and skip to the ligature/kerning program@>;
5214   @<Load the ligature/kerning program@>;
5215   @<Correct the pointers into the ligature/kerning program@>;
5216   @<Calculate |num_sizes| and allocate font metric structures@>;
5217   @<Load the dimension values for each size of this font@>;
5218   fclose(fp);
5219 }
5220
5221 @ @<Set up the filename of the \.{TFM} file and try to open the file@>= {
5222   sprintf(filename,"%s.tfm",fontname);
5223   fp=open_file(filename,"rb");
5224   if(!fp) {
5225     fprintf(stderr,"Cannot open font %s\n",filename);
5226 @.Cannot open font...@>
5227     exit(1);
5228   }
5229 }
5230
5231 @ @<Load the |lengths| data@>= {
5232   int i;
5233   for(i=0;i<12;i++) {
5234     int x=fgetc(fp);
5235     int y=fgetc(fp);
5236     lengths[i]=(x<<8)|y;
5237   }
5238 }
5239
5240 @ @<Set up |common_data| and |program|@>= {
5241   common_data.fontname=strdup(fontname);
5242   common_data.min_char=lengths[2]; // Hopefully should be zero
5243   common_data.max_char=lengths[3];
5244   common_data.right_boundary=none;
5245   common_data.lig_limit=255;
5246   common_data.info=malloc((lengths[3]+1)*sizeof(char_info_data));
5247   program=malloc(lengths[8]*sizeof(lig_kern_command));
5248 }
5249
5250 @ @<Skip the header words@>= {
5251   fseek(fp,4,SEEK_CUR); // Skip checksum
5252   common_data.design_size=get_fix_word(fp)>>4;
5253   fseek(fp,4*(lengths[1]-2),SEEK_CUR); // Skip everything else
5254 }
5255
5256 @ The character info is stored in a packed format. This is then unpacked
5257 and loaded into the |common_data.info| array, which has already been
5258 allocated.
5259
5260 @<Load the character info@>= {
5261   char_info_data*info=common_data.info+common_data.min_char;
5262   int i,c;
5263   for(i=common_data.min_char;i<=common_data.max_char;i++) {
5264     info->width=fgetc(fp);
5265     c=fgetc(fp);
5266     info->height=c>>4;
5267     info->depth=c&0xF;
5268     c=fgetc(fp);
5269     info->italic=c>>2;
5270     if((c&0x3)==0x1) {
5271       info->program=program+fgetc(fp);
5272     } @+else {
5273       info->program=0;
5274       fgetc(fp); // Lists and extensible recipes are not used
5275     }
5276     info++;
5277   }
5278 }
5279
5280 @ The ligature/kerning program will be read before the dimensions specific
5281 to the font size, so that the |common_data| can be set up first. And then
5282 we can skip back to |w_offset|, multiple times, once for each size that is
5283 being loaded.
5284
5285 @<Set |w_offset|, and skip to the ligature/kerning program@>= {
5286   w_offset=ftell(fp);
5287   fseek(fp,4*(lengths[4]+lengths[5]+lengths[6]+lengths[7]),SEEK_CUR);
5288 }
5289
5290 @ @<Load the ligature/kerning program@>= {
5291   int i;
5292   for(i=0;i<lengths[8];i++) {
5293     program[i].skip=fgetc(fp);
5294     program[i].next=fgetc(fp);
5295     program[i].op=fgetc(fp);
5296     program[i].remainder=fgetc(fp);
5297   }
5298 }
5299
5300 @ Sometimes you might need large ligature/kerning programs for many
5301 characters, so you can start at addresses other than 0 to 255. This is the
5302 way that specifies how that is done.
5303
5304 @<Correct the pointers into the ligature/kerning program@>= {
5305   int i;
5306   for(i=common_data.min_char;i<=common_data.max_char;i++)
5307     if(common_data.info[i].program &&
5308      common_data.info[i].program->skip>128)
5309       common_data.info[i].program=program+
5310        (common_data.info[i].program->op<<8)+
5311        common_data.info[i].program->remainder;
5312 }
5313
5314 @ @<Calculate |num_sizes| and allocate font metric structures@>= {
5315   scaled*p=at_size;
5316   data_index n;
5317   while(*p) {
5318     n=new_record(metrics);
5319     memcpy(&(metrics.data[n]),&common_data,sizeof(font_metric_data));
5320     metrics.data[n].at_size=*p;
5321     num_sizes++;
5322     p++;
5323   }
5324 }
5325
5326 @ Now to load the widths, heights, depths, italic corrections, and kerning
5327 distances. This is what |w_offset| is for, so that we can skip back to it.
5328 One allocated memory object is used for all dimension values of one size,
5329 and then the points are moved into the fields of the |font_metric_data|.
5330
5331 @d total_font_dimen
5332   (lengths[4]+lengths[5]+lengths[6]+lengths[7]+lengths[9])
5333 @d cur_metrics (metrics.data[metrics_index])
5334
5335 @<Load the dimension values for each size of this font@>= {
5336   scaled*p;
5337   scaled s,z,zprime,alpha,beta;
5338   for(p=at_size;*p;p++,metrics_index++) {
5339     scaled*d=malloc(sizeof(scaled)*total_font_dimen);
5340     int c;
5341     @<Ensure |d| is valid@>;
5342     @<Set the dimension base pointers for this font@>;
5343     z=*p; // The at size is now called |z|
5344     @<Compute |zprime|, |alpha|, and |beta|@>;
5345     fseek(fp,w_offset,SEEK_SET);
5346     c=lengths[4]+lengths[5]+lengths[6]+lengths[7];
5347     @<Load |c| scaled dimension values from |fp| into |d|@>;
5348     fseek(fp,4*lengths[8],SEEK_CUR);
5349     c=lengths[9];
5350     @<Load |c| scaled dimension values from |fp| into |d|@>;
5351     @<Load the font parameters@>;
5352   }
5353 }
5354
5355 @ @<Ensure |d| is valid@>= {
5356   if(!d) {
5357     fprintf(stderr,"Out of font memory\n");
5358     exit(1);
5359   }
5360 }
5361
5362 @ @<Set the dimension base pointers for this font@>= {
5363   cur_metrics.width_base=d;
5364   cur_metrics.height_base=cur_metrics.width_base+lengths[4];
5365   cur_metrics.depth_base=cur_metrics.height_base+lengths[5];
5366   cur_metrics.italic_base=cur_metrics.depth_base+lengths[6];
5367   cur_metrics.kern_base=cur_metrics.italic_base+lengths[7];
5368 }
5369
5370 @ @<Load |c| scaled dimension values from |fp| into |d|@>= {
5371   while(c--) {
5372     scaled b3,b2,b1,b0;
5373     b0=fgetc(fp); @+ b1=fgetc(fp); @+ b2=fgetc(fp); @+ b3=fgetc(fp);
5374     *d++=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta
5375      -(b0?alpha:0);
5376   }
5377 }
5378
5379 @ Now there are font parameters. There are up to sixteen font parameters,
5380 but numbered starting at 1. This is the code that makes it to do this.
5381
5382 @<Load the font parameters@>= {
5383   c=lengths[11]-1;
5384   if(c>14) c=14;
5385   if(c<0) c=0;
5386   cur_metrics.parameters[0]=cur_metrics.parameters[1]=0;
5387   if(lengths[11]) cur_metrics.parameters[1]=get_fix_word(fp)>>4;
5388   d=cur_metrics.parameters+2;
5389   @<Load |c| scaled dimension values from |fp| into |d|@>;
5390 }
5391
5392 @*Semantic Nest. We might be building many boxes at once, nested inside of
5393 each other. So, we need to keep the stack of what kind of boxes are
5394 currently in use, and the associated parameters, such as space factors,
5395 and the previous depth of the box.
5396
5397 There is two kinds, horizontal and vertical. The outer mode is considered
5398 horizontal so that it does not add leading between boxes, although it is
5399 not for making a box of the outer mode.
5400
5401 The currently active modes are stored both forwards and backwards, so that
5402 we can use them as a stack of box nodes. There is a null pointer to mark
5403 the end of the list.
5404
5405 @<Typedefs@>=
5406 typedef box_node*box_node_ptr;
5407
5408 @ @<Global variables@>=
5409 memory_of(box_node_ptr) box_nest;
5410
5411 @ @<Initialize memory@>=
5412 init_memory(box_nest,2);
5413
5414 @ We also have the semantic list with local variables to the current
5415 group. The purpose of the |data| fields depends on whether this state is
5416 in horizontal or vertical mode, and that is why it is a union so that we
5417 can access then by names in that case, although they can also be accessed
5418 by numbers as well.
5419
5420 @<Typedefs@>=
5421 typedef struct nest_state {
5422   struct nest_state*link; // Link to state this one is inside of
5423   boolean is_vertical; // 0 for horizontal, 1 for vertical
5424   data_index box_nest_index; // Index into |box_nest|
5425   union {
5426     scaled data[16];
5427     @<Nest state variables for horizontal mode@>;
5428     @<Nest state variables for vertical mode@>;
5429   }@+;
5430 } nest_state;
5431
5432 @ @<Global variables@>=
5433 nest_state*cur_nest;
5434
5435 @ @<Initialize memory@>= {
5436   cur_nest=malloc(sizeof(nest_state));
5437   cur_nest->link=0; // Means this is the outer level
5438   cur_nest->is_vertical=0; // Horizontal mode, no leading
5439   cur_nest->box_nest_index=new_record(box_nest);
5440   box_nest.data[cur_nest->box_nest_index]=0;
5441   cur_nest->space_factor=40; // Normal spacing
5442 }
5443
5444 @ @<Nest state variables for horizontal mode@>=
5445 struct {
5446   scaled space_factor; // Really just a number, but I don't care
5447 }
5448
5449 @ @<Nest state variables for vertical mode@>=
5450 struct {
5451   scaled prev_depth;
5452 }
5453
5454 @ Here are codes to enter a nest.
5455
5456 @-p void enter_nest(boolean is_vertical) {
5457   nest_state*link=cur_nest;
5458   cur_nest=malloc(sizeof(nest_state));
5459   cur_nest->link=link;
5460   cur_nest->is_vertical=is_vertical;
5461   cur_nest->box_nest_index=new_record(box_nest);
5462   box_nest.data[cur_nest->box_nest_index]=0;
5463   if(is_vertical) cur_nest->prev_depth=0;
5464   else cur_nest->space_factor=40;
5465 }
5466
5467 @ And we also need codes to leave a nest. This function returns the
5468 pointer to the first node in the box that was being created, and then the
5469 packaging programs can use that to make a box and iterate over the |next|
5470 pointers to read the entire list.
5471
5472 @-p box_node*leave_nest(void) {
5473   nest_state*link=cur_nest->link;
5474   box_node*node;
5475   @<Ensure it is not nest underflow@>;
5476   @<Set |node| to the node at the beginning of the current list@>;
5477   @<Rewind |box_nest| to the end of the parent list@>;
5478   free(cur_nest);
5479   cur_nest=link;
5480   return node;
5481 }
5482
5483 @ The outer nest should never be left or packaged; it is only used as a
5484 general-purpose stack and a container for other nests. (Unlike \TeX, the
5485 outer nest is never split into pages in \TeX nicard.)
5486
5487 @<Ensure it is not nest underflow@>= {
5488   if(!link) {
5489     fprintf(stderr,"\nNest underflow\n");
5490     exit(1);
5491   }
5492 }
5493
5494 @ Note: Sometimes |node| will be a null pointer if the current list is
5495 making an empty box (i.e. no nodes have been pushed).
5496
5497 @<Set |node| to the node at the beginning of the current list@>= {
5498   if(box_nest.used==cur_nest->box_nest_index+1) {
5499     node=0;
5500   } @+else {
5501     node=box_nest.data[cur_nest->box_nest_index+1];
5502   }
5503 }
5504
5505 @ @<Rewind |box_nest| to the end of the parent list@>= {
5506   box_nest.used=cur_nest->box_nest_index;
5507 }
5508
5509 @ And finally we have codes to push and pop nodes in the current list.
5510 These are simple codes since there isn't much to do.
5511
5512 @d top_of_nodelist (box_nest.data[box_nest.used-1])
5513
5514 @-p inline void push_node(box_node*ptr) {
5515   top_of_nodelist->next=ptr;
5516   box_nest.data[new_record(box_nest)]=ptr;
5517 }
5518
5519 @ @-p box_node*pop_node(void) {
5520   box_node*ptr=top_of_nodelist;
5521   if(ptr) {
5522     box_nest.used--;
5523     top_of_nodelist->next=0;
5524   }
5525   return ptr;
5526 }
5527
5528 @*Box Calculation. Here are codes to calculate various things about the
5529 boxes, including badness, width\slash height\slash depth of a string of
5530 characters, and so on.
5531
5532 This function is used to compute the ``badness'' of a glue setting, when a
5533 total $t$ is supposed to be made from amounts that sum to $s$. In this
5534 program, the badness is $1000(t/s)^3$ (ten times as much as \TeX). It does
5535 not have to be extremely accurate, although it is sufficiently accurate to
5536 do line breaking and so on. Ten million occurs when you stretch more than
5537 21 times as much as it should; this should never happen so it is given the
5538 maximum possible badness that can be computed using this. The badness
5539 squared should never exceed sixty-three bits (which it won't).
5540
5541 @!@^badness@>
5542
5543 @d very_bad 10000000
5544 @d too_bad  10000001
5545
5546 @-p int calc_badness(scaled t,scaled s) {
5547   long long int r; // Apprximately $\root3\of{1000\cdot2^{32}}(t/s)$
5548   if(t==0) return 0;
5549   if(s<=0) return very_bad;
5550   r=(16255LL*t)/s;
5551   if(r>2097152LL) return very_bad;
5552   r=(r*r*r+(1LL<<31))>>32;
5553   if(r>very_bad) r=very_bad;
5554   return r;
5555 }
5556
5557 @ Next we calculate the width, height, and depth of a string of
5558 characters in one font, possibly including accents, kerns, and tracking.
5559 Ligatures will have already been dealt with before this code is reached,
5560 and kerns will already have been added in.
5561
5562 @-p void calc_chars(box_node*b,scaled*w,scaled*h,scaled*d,short t) {
5563   font_metric_data*m=&(metrics.data[b->font]);
5564   unsigned short*c; // Pointer to current character code
5565   scaled junk; // Ensures no segmentation faults are occuring
5566   if(!w) w=&junk;
5567   if(!h) h=&junk;
5568   if(!d) d=&junk;
5569   *w=*h=*d=0;
5570   for(c=b->chars;*c!=0xFFFF;c++) {
5571     if(*c&0x8000) {
5572       if(*c&0x4000) {
5573         @<Process an implicit kern in |calc_chars|@>;
5574       } @+else {
5575         @<Process an accent in |calc_chars|@>;
5576       }
5577     } @+else {
5578       @<Process a normal character in |calc_chars|@>;
5579     }
5580   }
5581 }
5582
5583 @ @<Process a normal character in |calc_chars|@>= {
5584   scaled width=m->width_base[m->info[*c&0xFF].width];
5585   scaled height=m->height_base[m->info[*c&0xFF].height];
5586   scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5587   if(*h<height) *h=height;
5588   if(*d<depth) *d=depth;
5589   *w+=(t*width)>>7;
5590 }
5591
5592 @ @<Process an implicit kern in |calc_chars|@>= {
5593   scaled width=m->kern_base[*c&0x3FFF];
5594   *w+=(t*width)>>7;
5595 }
5596
5597 @ Now to do accents. This requires looking ahead to see the height for the
5598 next character. If the accent has positive height and zero depth, then it
5599 should be adjusted higher in case the letter is taller than an `x' (for
5600 example uppercase letters such as `\'E'). However, if the accent has
5601 positive depth and zero height, then it is an accent that should not be
5602 adjusted for the height of the character (for example `\c C'), although it
5603 might be adjusted for the depth.
5604
5605 It should never happen that the next item is not a normal character (if it
5606 does, then I am not considered responsible for your bad luck).
5607
5608 @<Process an accent in |calc_chars|@>= {
5609   scaled height=m->height_base[m->info[*c&0xFF].height];
5610   scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5611   scaled c_height=m->height_base[m->info[c[1]&0xFF].height];
5612   scaled c_depth=m->height_base[m->info[c[1]&0xFF].depth];
5613   if(height<=0 && depth>0) {
5614     depth+=c_depth;
5615   } @+else {
5616     height+=c_height-m->parameters[5];
5617   }
5618   if(*h<height) *h=height;
5619   if(*d<depth) *d=depth;
5620 }
5621
5622 @*Packaging. This is how the nest lists are packaged into boxes and the
5623 width, height, and depth are calculated from them. They are separate for
5624 horizontal and vertical packing, although there are similarities.
5625
5626 The packing code is also used to compute the glue set of the box, and its
5627 badness. Here is the global variable to store the badness.
5628
5629 @<Global variables@>=
5630 int last_badness=too_bad;
5631
5632 @ There are two such subroutines, |hpackage| and |vpackage|, depending on
5633 what kind of box is wanted. Each one also takes three parameters: |first|,
5634 the first node in the box; |at_size|, the intended size, and |factor|, the
5635 amount to multiply the natural size by before adding |at_size|.
5636
5637 @d common_package box_node*first,scaled at_size,signed char factor
5638
5639 @ Horizontal packaging must compute height, width, and depth of characters
5640 and other boxes it contains, as well as compute glue settings, specials,
5641 adjustments, and so on.
5642
5643 For horizontal packaging, there is also a |tracking| parameter for spacing
5644 the letters in the box.
5645
5646 @-p box_node*hpackage(common_package,unsigned char tracking) {
5647   box_node*box=create_node(hlist_node,0,sizeof_hlist_node);
5648   scaled stretchability[4]; // Total stretch of all glue
5649   scaled shrinkability[4]; // Total shrink of all glue
5650   scaled natural=0; // Total width
5651   box_node*this; // Current node
5652   @<Initialize variables for |hpackage|@>;
5653   @<Read all nodes in a horizontal list to package them@>;
5654 #define @!actual @, box->width
5655   actual=(factor*natural)/8+at_size;
5656   @<Compute glue set and badness@>;
5657 #undef actual
5658   return box;
5659 }
5660
5661 @ @<Initialize variables for |hpackage|@>= {
5662   int o;
5663   box->list=first;
5664   box->tracking=tracking;
5665   box->height=box->depth=box->shift_amount=0;
5666   box->glue_set=0;
5667   for(o=0;o<4;o++) stretchability[o]=shrinkability[o]=0;
5668 }
5669
5670 @ @<Read all nodes in a horizontal list to package them@>= {
5671   for(this=first;*this;this=this->next) {
5672     switch(type_of(this)) {
5673       case chars_node: @<Add word to box size@>; @+break;
5674       case hlist_node: case vlist_node: case rule_node:
5675         @<Apply the size of a box to a horizontal list@>;
5676         @+break;
5677       default: break; // All other nodes are ignored
5678     }
5679   }
5680 }
5681
5682 @ @<Add word to box size@>= {
5683   scaled w,h,d;
5684   calc_chars(this,&w,&h,&d,tracking<<1);
5685   natural+=w;
5686   if(h>box->height) box->height=h;
5687   if(d>box->depth) box->depth=d;
5688 }
5689
5690 @ @<Apply the size of a box to a horizontal list@>= {
5691   natural+=this->width;
5692   if(this->height+this->shift_amount>box->height)
5693     box->height=this->height+this->shift_amount;
5694   if(this->depth-this->shift_amount>box->depth)
5695     box->depth=this->depth-this->shift_amount;
5696 }
5697
5698 @ A macro named |actual| is defined above so that this code can be used
5699 for both horizontal and for vertical packaging.
5700
5701 We also have a macro here to decide setting the glue.
5702
5703 @d set_glue(_order,_flag,_diff,_glue)
5704   (box->type_and_subtype|=((_order)<<4)|((_flag)<<7)),
5705   (box->glue_set=make_fraction(_glue,_diff))
5706
5707 @<Compute glue set and badness@>= {
5708   if(actual>natural) {
5709     @<Glue is stretching@>;
5710   } @+else if(actual<natural) {
5711     @<Glue is shrinking@>;
5712   } @+else {
5713     last_badness=0; // Perfect!
5714   }
5715 }
5716
5717 @ @<Glue is stretching@>= {
5718   if(stretching[filll]!=0) {
5719     set_glue(filll,0,actual-natural,stretching[filll]);
5720     last_badness=0;
5721   } @+else if(stretching[fill]!=0) {
5722     set_glue(fill,0,actual-natural,stretching[fill]);
5723     last_badness=0;
5724   } @+else if(stretching[fil]!=0) {
5725     set_glue(fil,0,actual-natural,stretching[fil]);
5726     last_badness=0;
5727   } @+else if(stretching[finite]!=0) {
5728     set_glue(finite,0,actual-natural,stretching[finite]);
5729     last_badness=calc_badness(actual-natural,stretching[finite]);
5730   } @+else {
5731     last_badness=too_bad;
5732   }
5733 }
5734
5735 @ @<Glue is shrinking@>= {
5736   if(shrinking[filll]!=0) {
5737     set_glue(filll,1,natural-actual,shrinking[filll]);
5738     last_badness=0;
5739   } @+else if(shrinking[fill]!=0) {
5740     set_glue(fill,1,natural-actual,shrinking[fill]);
5741     last_badness=0;
5742   } @+else if(shrinking[fil]!=0) {
5743     set_glue(fil,1,natural-actual,shrinking[fil]);
5744     last_badness=0;
5745   } @+else if(shrinking[finite]>=natural-actual) {
5746     set_glue(finite,1,natural-actual,shrinking[finite]);
5747     last_badness=calc_badness(natural-actual,shrinking[finite]);
5748   } @+else {
5749     set_glue(finite,1,1,1); // Shrink as much as possible
5750     last_badness=too_bad;
5751   }
5752 }
5753
5754 @ Now vertical.
5755
5756 For vertical packaging, the two extra parameters are |max_dp|, the maximum
5757 depth; and |align_top|, which should be set true if it is wanted to align
5758 at the top instead of at the bottom.
5759
5760 @-p box_node*vpackage(common_package,scaled max_dp,boolean align_top) {
5761   box_node*box=create_node(vlist_node,0,sizeof_vlist_node);
5762   scaled stretchability[4]; // Total stretch of all glue
5763   scaled shrinkability[4]; // Total shrink of all glue
5764   scaled natural=0; // Total height plus depth
5765   scaled bonnet=0; // Height of first item
5766   scaled boot=0; // Depth of last item
5767   box_node*this; // Current node
5768 }
5769
5770 @*Main Program. This is where the program starts and ends. Everything else
5771 in the other chapters is started from here.
5772
5773 @<Include files@>=
5774 #include <signal.h>
5775 #include <stdio.h>
5776 #include <stdlib.h>
5777 #include <string.h>
5778 #include <time.h>
5779 #include <unistd.h>
5780
5781 @ @-p int main(int argc,char**argv) {
5782   boolean dvi_mode=0;
5783   @<Set up signal handler@>;
5784   @<Initialize memory@>;
5785   @<Display the banner message@>;
5786   @<Decide whether in DVI reading mode@>;
5787   if(!dvi_mode) @<Open the main input file@>;
5788   @<Initialize the input states@>;
5789   @<Initialize the tables and registers@>;
5790   @<Initialize the random number generator@>;
5791   @<Set registers according to command-line parameters@>;
5792   if(!dvi_mode) @<Process the input files@>;
5793   if(dvi_mode) dvi_mode=read_dvi_file(argv[1]);
5794   @<Call program in \.Z register if necessary@>;
5795   if(!dvi_mode) @<Send |end_transmission| to each card area@>;
5796   @<Write the output files@>;
5797   if(registers['Q'].is_string && dvi_mode &&
5798     (argv[0][0]!='-' || argv[0][1]!='z'))  @<Switch to ImageMagick@>;
5799   return 0;
5800 }
5801
5802 @ @<Display the banner message@>= {
5803   fprintf(stderr,"TeXnicard version %s\n",version_string);
5804   fprintf(stderr,
5805     "This program is free software and comes with NO WARRANTY.\n");
5806   fflush(stderr);
5807 }
5808
5809 @ @<Set registers according to command-line parameters@>= {
5810   int i;
5811   for(i=2;i<argc;i++) {
5812     registers[i+('0'-2)].is_string=1;
5813     registers[i+('0'-2)].text=strdup(argv[i]);
5814   }
5815 }
5816
5817 @ The main input file will be either the terminal, or another file if the
5818 command-line argument is given.
5819
5820 @<Open the main input file@>= {
5821   if(argc>1 && strcmp(argv[1],"-")!=0) {
5822     --current_input_file;
5823     open_input(argv[1]);
5824   } @+else {
5825     current_fp=0;
5826     strcpy(current_filename,"<Teletype>");
5827   }
5828 }
5829
5830 @ @<Call program in \.Z register if necessary@>= {
5831   if(registers['Z'].is_string) execute_program(registers['Z'].text);
5832 }
5833
5834 @ The alternative mode to run this program is DVI mode. DVI mode is
5835 specified by a command-line switch.
5836
5837 @.DVI@>
5838
5839 @<Decide whether in DVI reading mode@>= {
5840   if(argc>1 && argv[1][0]=='-' && argv[1][1]) {
5841     dvi_mode=1;
5842     argv++; @+ argc--;
5843     if(argv[0][1]=='a') {
5844       printing_mode=printing_all_cards;
5845     } @+else if(argv[0][1]=='f') {
5846       printing_mode=printing_list_from_file;
5847       printlistfile=fopen(argv[1],"r");
5848       argv++; @+ argc--;
5849     } @+else if(argv[0][1]=='n') {
5850       printing_mode=printing_list;
5851       printlisttext=argv[1];
5852       argv++; @+ argc--;
5853     } @+else if(argv[0][1]=='z') {
5854       printing_mode=printing_list;
5855       printlisttext="";
5856     }
5857   }
5858 }
5859
5860 @*Signal Handlers. The |SIGSEGV| signal should be handled in case
5861 something goes wrong in the program and it causes a segmentation fault, it
5862 should attempt to recover what you have before terminating, in order to be
5863 better at diagnosing the error.
5864
5865 @<Set up signal handler@>= {
5866   signal(SIGSEGV,handle_crash);
5867 }
5868
5869 @ Some things will be more careful here to ensure not to cause the error
5870 again (if it does, it will just quit, though).
5871
5872 @-p void handle_crash(int sig) {
5873   signal(SIGSEGV,SIG_DFL);
5874   @#fprintf(stderr,"\nFatal signal error (%d)\n",sig);
5875 @.Fatal signal error...@>
5876   fprintf(stderr,"cur_state=%d\ncur_name=%d\ncur_data=%d\n",
5877    cur_state,cur_name,cur_data);
5878   if(current_input_file>=input_files && current_input_file<input_files
5879    +max_input_stack) @<Display input stack after a crash@>;
5880   fprintf(stderr,"Program stack level: %d\n",stack_ptr-stack);
5881   fprintf(stderr,"Save stack level: %d\n",save_stack_ptr-save_stack);
5882   @#exit(3);
5883 }
5884
5885 @ @<Display input stack after a crash@>= {
5886   for(;;) {
5887     fprintf(stderr,"File %s line %d\n",current_filename,current_line);
5888     if(current_input_file--==input_files) break;
5889   }
5890 }
5891
5892 @*The Future. Here are some ideas for future versions of this program:
5893
5894 $\bullet$ A customizable Inform7-like parser, that would compile into a C
5895 code, so that you can play the cards on rule-enforcing computer programs.
5896 @^Inform@>
5897
5898 $\bullet$ A database to keep track of how many copies of a card have been
5899 sold, for inventory purposes.
5900 @^commercial viability@>
5901
5902 $\bullet$ Full text search, for things such as the Oracle text search.
5903 @^Oracle@>
5904
5905 $\bullet$ Allow more than 256 fonts in one card set.
5906
5907 $\bullet$ Unicode input (UTF-8).
5908
5909 $\bullet$ Built-in typesetting (using some of the algorithms of \TeX) and
5910 image manipulation, so that there is no dependence on external programs,
5911 and everything can be done in one pass.
5912
5913 $\bullet$ Big spider!
5914 @^arachnids@>
5915 @^spider@>
5916
5917 @*Bibliography.
5918
5919 \count255=0 %
5920 \long\def\Par{\csname par\endcsname}%
5921 \loop\ifnum\count255<\bibliocount%
5922   \advance\count255 by 1
5923   \Par$^{[\the\count255]}$\csname biblio \the\count255\endcsname\Par%
5924 \repeat%
5925
5926 @*Index. Here you can find references to the definition and use of all the
5927 variables, subroutines, etc.\ used in this program, as well as a few other
5928 things of interest. Underlined entries indicate where it is defined.
5929
5930 {\bf Important note:} All the numbers in this index are section numbers,
5931 not page numbers.
5932
5933 % End of file "texnicard.w"