src/pre-eqn/text.cpp

   1 /*@
   2  * Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
   3  *
   4  * Copyright (C) 1989 - 1992, 2003, 2007
   5  *    Free Software Foundation, Inc.
   6  *      Written by James Clark (jjc@jclark.com)
   7  *
   8  * This is free software; you can redistribute it and/or modify it under
   9  * the terms of the GNU General Public License as published by the Free
  10  * Software Foundation; either version 2, or (at your option) any later
  11  * version.
  12  *
  13  * This is distributed in the hope that it will be useful, but WITHOUT ANY
  14  * WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16  * for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License along
  19  * with groff; see the file COPYING.  If not, write to the Free Software
  20  * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
  21  */
  22
  23 #include "config.h"
  24 #include "eqn-config.h"
  25
  26 #include <ctype.h>
  27
  28 #include "eqn.h"
  29 #include "pbox.h"
  30 #include "ptable.h"
  31
  32 struct map {
  33   const char *from;
  34   const char *to;
  35 };
  36
  37 struct map entity_table[] = { // FIXME const
  38   // Classic troff special characters
  39   {"%", "&shy;"},       // ISOnum
  40   {"'", "&acute;"},     // ISOdia
  41   {"!=", "&ne;"},       // ISOtech
  42   {"**", "&lowast;"},   // ISOtech
  43   {"*a", "&alpha;"},    // ISOgrk3
  44   {"*A", "A"},
  45   {"*b", "&beta;"},     // ISOgrk3
  46   {"*B", "B"},
  47   {"*d", "&delta;"},    // ISOgrk3
  48   {"*D", "&Delta;"},    // ISOgrk3
  49   {"*e", "&epsilon;"},  // ISOgrk3
  50   {"*E", "E"},
  51   {"*f", "&phi;"},      // ISOgrk3
  52   {"*F", "&Phi;"},      // ISOgrk3
  53   {"*g", "&gamma;"},    // ISOgrk3
  54   {"*G", "&Gamma;"},    // ISOgrk3
  55   {"*h", "&theta;"},    // ISOgrk3
  56   {"*H", "&Theta;"},    // ISOgrk3
  57   {"*i", "&iota;"},     // ISOgrk3
  58   {"*I", "I"},
  59   {"*k", "&kappa;"},    // ISOgrk3
  60   {"*K", "K;"},
  61   {"*l", "&lambda;"},   // ISOgrk3
  62   {"*L", "&Lambda;"},   // ISOgrk3
  63   {"*m", "&mu;"},       // ISOgrk3
  64   {"*M", "M"},
  65   {"*n", "&nu;"},       // ISOgrk3
  66   {"*N", "N"},
  67   {"*o", "o"},
  68   {"*O", "O"},
  69   {"*p", "&pi;"},       // ISOgrk3
  70   {"*P", "&Pi;"},       // ISOgrk3
  71   {"*q", "&psi;"},      // ISOgrk3
  72   {"*Q", "&PSI;"},      // ISOgrk3
  73   {"*r", "&rho;"},      // ISOgrk3
  74   {"*R", "R"},
  75   {"*s", "&sigma;"},    // ISOgrk3
  76   {"*S", "&Sigma;"},    // ISOgrk3
  77   {"*t", "&tau;"},      // ISOgrk3
  78   {"*T", "&Tau;"},      // ISOgrk3
  79   {"*u", "&upsilon;"},  // ISOgrk3
  80   {"*U", "&Upsilon;"},  // ISOgrk3
  81   {"*w", "&omega;"},    // ISOgrk3
  82   {"*W", "&Omega;"},    // ISOgrk3
  83   {"*x", "&chi;"},      // ISOgrk3
  84   {"*X", "&Chi;"},      // ISOgrk3
  85   {"*y", "&eta;"},      // ISOgrk3
  86   {"*Y", "&Eta;"},      // ISOgrk3
  87   {"*z", "&zeta;"},     // ISOgrk3
  88   {"*Z", "&Zeta;"},     // ISOgrk3
  89   {"+-", "&plusmn;"},   // ISOnum
  90   {"->", "&rarr;"},     // ISOnum
  91   {"12", "&frac12;"},   // ISOnum
  92   {"14", "&frac14;"},   // ISOnum
  93   {"34", "&frac34;"},   // ISOnum
  94   {"<-", "&larr;"},     // ISOnum
  95   {"==", "&equiv;"},    // ISOtech
  96   {"Fi", "&ffilig;"},   // ISOpub
  97   {"Fl", "&ffllig;"},   // ISOpub
  98   {"aa", "&acute;"},    // ISOdia
  99   {"ap", "&sim;"},      // ISOtech
 100   {"bl", "&phonexb;"},  // ISOpub
 101   {"br", "&boxv;"},     // ISObox
 102   {"bs", "&phone;"},    // ISOpub (for the Bell logo)
 103   {"bu", "&bull;"},     // ISOpub
 104   {"bv", "&verbar;"},   // ISOnum
 105   {"ca", "&cap;"},      // ISOtech
 106   {"ci", "&cir;"},      // ISOpub
 107   {"co", "&copy;"},     // ISOnum
 108   {"ct", "&cent;"},     // ISOnum
 109   {"cu", "&cup;"},      // ISOtech
 110   {"da", "&darr;"},     // ISOnum
 111   {"de", "&deg;"},      // ISOnum
 112   {"dg", "&dagger;"},   // ISOpub
 113   {"dd", "&Dagger;"},   // ISOpub
 114   {"di", "&divide;"},   // ISOnum
 115   {"em", "&mdash;"},    // ISOpub
 116   {"eq", "&equals;"},   // ISOnum
 117   {"es", "&empty;"},    // ISOamso
 118   {"ff", "&fflig;"},    // ISOpub
 119   {"fi", "&filig;"},    // ISOpub
 120   {"fl", "&fllig;"},    // ISOpub
 121   {"fm", "&prime;"},    // ISOtech
 122   {"ge", "&ge;"},       // ISOtech
 123   {"gr", "&nabla;"},    // ISOtech
 124   {"hy", "&hyphen;"},   // ISOnum
 125   {"ib", "&sube;"},     // ISOtech
 126   {"if", "&infin;"},    // ISOtech
 127   {"ip", "&supe;"},     // ISOtech
 128   {"is", "&int;"},      // ISOtech
 129   {"le", "&le;"},       // ISOtech
 130   // Some pile characters go here
 131   {"mi", "&minus;"},    // ISOtech
 132   {"mo", "&isin;"},     // ISOtech
 133   {"mu", "&times;"},    // ISOnum
 134   {"no", "&not;"},      // ISOnum
 135   {"or", "&verbar;"},   // ISOnum
 136   {"pl", "&plus;"},     // ISOnum
 137   {"pt", "&prop;"},     // ISOtech
 138   {"rg", "&trade;"},    // ISOnum
 139   // More pile characters go here
 140   {"rn", "&macr;"},     // ISOdia
 141   {"ru", "&lowbar;"},   // ISOnum
 142   {"sb", "&sub;"},      // ISOtech
 143   {"sc", "&sect;"},     // ISOnum
 144   {"sl", "/"},
 145   {"sp", "&sup;"},      // ISOtech
 146   {"sq", "&squf;"},     // ISOpub
 147   {"sr", "&radic;"},    // ISOtech
 148   {"ts", "&sigmav;"},   // ISOgrk3
 149   {"ua", "&uarr;"},     // ISOnum
 150   {"ul", "_"},
 151   {"~=", "&cong;"},     // ISOtech
 152   // Extended specials supported by groff; see groff_char(7).
 153   // These are listed in the order they occur on that man page.
 154   {"-D", "&ETH;"},      // ISOlat: Icelandic uppercase eth
 155   {"Sd", "&eth;"},      // ISOlat1: Icelandic lowercase eth
 156   {"TP", "&THORN;"},    // ISOlat1: Icelandic uppercase thorn
 157   {"Tp", "&thorn;"},    // ISOlat1: Icelandic lowercase thorn
 158   {"ss", "&szlig;"},    // ISOlat1
 159   // Ligatures
 160   // ff, fi, fl, ffi, ffl from old troff go here
 161   {"AE", "&AElig;"},    // ISOlat1
 162   {"ae", "&aelig;"},    // ISOlat1
 163   {"OE", "&OElig;"},    // ISOlat2
 164   {"oe", "&oelig;"},    // ISOlat2
 165   {"IJ", "&ijlig;"},    // ISOlat2: Dutch IJ ligature
 166   {"ij", "&IJlig;"},    // ISOlat2: Dutch ij ligature
 167   {".i", "&inodot;"},   // ISOlat2,ISOamso
 168   {".j", "&jnodot;"},   // ISOamso (undocumented but in 1.19)
 169   // Accented characters
 170   {"'A", "&Aacute;"},   // ISOlat1
 171   {"'C", "&Cacute;"},   // ISOlat2
 172   {"'E", "&Eacute;"},   // ISOlat1
 173   {"'I", "&Iacute;"},   // ISOlat1
 174   {"'O", "&Oacute;"},   // ISOlat1
 175   {"'U", "&Uacute;"},   // ISOlat1
 176   {"'Y", "&Yacute;"},   // ISOlat1
 177   {"'a", "&aacute;"},   // ISOlat1
 178   {"'c", "&cacute;"},   // ISOlat2
 179   {"'e", "&eacute;"},   // ISOlat1
 180   {"'i", "&iacute;"},   // ISOlat1
 181   {"'o", "&oacute;"},   // ISOlat1
 182   {"'u", "&uacute;"},   // ISOlat1
 183   {"'y", "&yacute;"},   // ISOlat1
 184   {":A", "&Auml;"},     // ISOlat1
 185   {":E", "&Euml;"},     // ISOlat1
 186   {":I", "&Iuml;"},     // ISOlat1
 187   {":O", "&Ouml;"},     // ISOlat1
 188   {":U", "&Uuml;"},     // ISOlat1
 189   {":Y", "&Yuml;"},     // ISOlat2
 190   {":a", "&auml;"},     // ISOlat1
 191   {":e", "&euml;"},     // ISOlat1
 192   {":i", "&iuml;"},     // ISOlat1
 193   {":o", "&ouml;"},     // ISOlat1
 194   {":u", "&uuml;"},     // ISOlat1
 195   {":y", "&yuml;"},     // ISOlat1
 196   {"^A", "&Acirc;"},    // ISOlat1
 197   {"^E", "&Ecirc;"},    // ISOlat1
 198   {"^I", "&Icirc;"},    // ISOlat1
 199   {"^O", "&Ocirc;"},    // ISOlat1
 200   {"^U", "&Ucirc;"},    // ISOlat1
 201   {"^a", "&acirc;"},    // ISOlat1
 202   {"^e", "&ecirc;"},    // ISOlat1
 203   {"^i", "&icirc;"},    // ISOlat1
 204   {"^o", "&ocirc;"},    // ISOlat1
 205   {"^u", "&ucirc;"},    // ISOlat1
 206   {"`A", "&Agrave;"},   // ISOlat1
 207   {"`E", "&Egrave;"},   // ISOlat1
 208   {"`I", "&Igrave;"},   // ISOlat1
 209   {"`O", "&Ograve;"},   // ISOlat1
 210   {"`U", "&Ugrave;"},   // ISOlat1
 211   {"`a", "&agrave;"},   // ISOlat1
 212   {"`e", "&egrave;"},   // ISOlat1
 213   {"`i", "&igrave;"},   // ISOlat1
 214   {"`o", "&ograve;"},   // ISOlat1
 215   {"`u", "&ugrave;"},   // ISOlat1
 216   {"~A", "&Atilde;"},   // ISOlat1
 217   {"~N", "&Ntilde;"},   // ISOlat1
 218   {"~O", "&Otilde;"},   // ISOlat1
 219   {"~a", "&atilde;"},   // ISOlat1
 220   {"~n", "&ntilde;"},   // ISOlat1
 221   {"~o", "&otilde;"},   // ISOlat1
 222   {"vS", "&Scaron;"},   // ISOlat2
 223   {"vs", "&scaron;"},   // ISOlat2
 224   {"vZ", "&Zcaron;"},   // ISOlat2
 225   {"vz", "&zcaron;"},   // ISOlat2
 226   {",C", "&Ccedil;"},   // ISOlat1
 227   {",c", "&ccedil;"},   // ISOlat1
 228   {"/L", "&Lstrok;"},   // ISOlat2: Polish L with a slash
 229   {"/l", "&lstrok;"},   // ISOlat2: Polish l with a slash
 230   {"/O", "&Oslash;"},   // ISOlat1
 231   {"/o", "&oslash;"},   // ISOlat1
 232   {"oA", "&Aring;"},    // ISOlat1
 233   {"oa", "&aring;"},    // ISOlat1
 234   // Accents
 235   {"a\"","&dblac;"},    // ISOdia: double acute accent (Hungarian umlaut)
 236   {"a-", "&macr;"},     // ISOdia: macron or bar accent
 237   {"a.", "&dot;"},      // ISOdia: dot above
 238   {"a^", "&circ;"},     // ISOdia: circumflex accent
 239   {"aa", "&acute;"},    // ISOdia: acute accent
 240   {"ga", "&grave;"},    // ISOdia: grave accent
 241   {"ab", "&breve;"},    // ISOdia: breve accent
 242   {"ac", "&cedil;"},    // ISOdia: cedilla accent
 243   {"ad", "&uml;"},      // ISOdia: umlaut or dieresis
 244   {"ah", "&caron;"},    // ISOdia: caron (aka hacek accent)
 245   {"ao", "&ring;"},     // ISOdia: ring or circle accent
 246   {"a~", "&tilde;"},    // ISOdia: tilde accent
 247   {"ho", "&ogon;"},     // ISOdia: hook or ogonek accent
 248   {"ha", "^"},          // ASCII circumflex, hat, caret
 249   {"ti", "~"},          // ASCII tilde, large tilde
 250   // Quotes
 251   {"Bq", "&lsquor;"},   // ISOpub: low double comma quote
 252   {"bq", "&ldquor;"},   // ISOpub: low single comma quote
 253   {"lq", "&ldquo;"},    // ISOnum
 254   {"rq", "&rdquo;"},    // ISOpub
 255   {"oq", "&lsquo;"},    // ISOnum: single open quote
 256   {"cq", "&rsquo;"},    // ISOnum: single closing quote (ASCII 39)
 257   {"aq", "&zerosp;'"},  // apostrophe quote
 258   {"dq", "\""},         // double quote (ASCII 34)
 259   {"Fo", "&laquo;"},    // ISOnum
 260   {"Fc", "&raquo;"},    // ISOnum
 261   //{"fo", "&fo;"},
 262   //{"fc", "&fc;"},
 263   // Punctuation
 264   {"r!", "&iexcl;"},    // ISOnum
 265   {"r?", "&iquest;"},   // ISOnum
 266   // Old troff \(em goes here
 267   {"en", "&ndash;"},    // ISOpub: en dash
 268   // Old troff \(hy goes here
 269   // Brackets
 270   {"lB", "&lsqb;"},     // ISOnum: left (square) bracket
 271   {"rB", "&rsqb;"},     // ISOnum: right (square) bracket
 272   {"lC", "&lcub;"},     // ISOnum: left (curly) brace
 273   {"rC", "&rcub;"},     // ISOnum: right (curly) brace
 274   {"la", "&lang;"},     // ISOtech: left angle bracket
 275   {"ra", "&rang;"},     // ISOtech: right angle bracket
 276   // Old troff \(bv goes here
 277   // Bracket-pile characters could go here.
 278   // Arrows
 279   // Old troff \(<- and \(-> go here
 280   {"<>", "&harr;"},     // ISOamsa
 281   {"da", "&darr;"},     // ISOnum
 282   {"ua", "&uarr;"},     // ISOnum
 283   {"lA", "&lArr;"},     // ISOtech
 284   {"rA", "&rArr;"},     // ISOtech
 285   {"hA", "&iff;"},      // ISOtech: horizontal double-headed arrow
 286   {"dA", "&dArr;"},     // ISOamsa
 287   {"uA", "&uArr;"},     // ISOamsa
 288   {"vA", "&vArr;"},     // ISOamsa: vertical double-headed double arrow
 289   //{"an", "&an;"},
 290   // Lines
 291   {"-h", "&planck;"},   // ISOamso: h-bar (Planck's constant)
 292   // Old troff \(or goes here
 293   {"ba", "&verbar;"},   // ISOnum
 294   // Old troff \(br, \{u, \(ul, \(bv go here
 295   {"bb", "&brvbar;"},   // ISOnum
 296   {"sl", "/"},
 297   {"rs", "&bsol;"},     // ISOnum
 298   // Text markers
 299   // Old troff \(ci, \(bu, \(dd, \(dg go here
 300   {"lz", "&loz;"},      // ISOpub
 301   // Old troff sq goes here
 302   {"ps", "&para;"},     // ISOnum: paragraph or pilcrow sign
 303   {"sc", "&sect;"},     // ISOnum (in old troff)
 304   // Old troff \(lh, \{h go here
 305   {"at", "&commat;"},   // ISOnum
 306   {"sh", "&num;"},      // ISOnum
 307   //{"CR", "&CR;"},
 308   {"OK", "&check;"},    // ISOpub
 309   // Legalize
 310   // Old troff \(co, \{g go here
 311   {"tm", "&trade;"},    // ISOnum
 312   // Currency symbols
 313   {"Do", "&dollar;"},   // ISOnum
 314   {"ct", "&cent;"},     // ISOnum
 315   {"eu", "&euro;"},
 316   {"Eu", "&euro;"},
 317   {"Ye", "&yen;"},      // ISOnum
 318   {"Po", "&pound;"},    // ISOnum
 319   {"Cs", "&curren;"},   // ISOnum: currency sign
 320   {"Fn", "&fnof"},      // ISOtech
 321   // Units
 322   // Old troff de goes here
 323   {"%0", "&permil;"},   // ISOtech: per thousand, per mille sign
 324   // Old troff \(fm goes here
 325   {"sd", "&Prime;"},    // ISOtech
 326   {"mc", "&micro;"},    // ISOnum
 327   {"Of", "&ordf;"},     // ISOnum
 328   {"Om", "&ordm;"},     // ISOnum
 329   // Logical symbols
 330   {"AN", "&and;"},      // ISOtech
 331   {"OR", "&or;"},       // ISOtech
 332   // Old troff \(no goes here
 333   {"te", "&exist;"},    // ISOtech: there exists, existential quantifier
 334   {"fa", "&forall;"},   // ISOtech: for all, universal quantifier
 335   {"st", "&bepsi"},     // ISOamsr: such that
 336   {"3d", "&there4;"},   // ISOtech
 337   {"tf", "&there4;"},   // ISOtech
 338   // Mathematical symbols
 339   // Old troff "12", "14", "34" goes here
 340   {"S1", "&sup1;"},     // ISOnum
 341   {"S2", "&sup2;"},     // ISOnum
 342   {"S3", "&sup3;"},     // ISOnum
 343   // Old troff \(pl", \-, \(+- go here
 344   {"t+-", "&plusmn;"},  // ISOnum
 345   {"-+", "&mnplus;"},   // ISOtech
 346   {"pc", "&middot;"},   // ISOnum
 347   {"md", "&middot;"},   // ISOnum
 348   // Old troff \(mu goes here
 349   {"tmu", "&times;"},   // ISOnum
 350   {"c*", "&otimes;"},   // ISOamsb: multiply sign in a circle
 351   {"c+", "&oplus;"},    // ISOamsb: plus sign in a circle
 352   // Old troff \(di goes here
 353   {"tdi", "&divide;"},  // ISOnum
 354   {"f/", "&horbar;"},   // ISOnum: horizintal bar for fractions
 355   // Old troff \(** goes here
 356   {"<=", "&le;"},       // ISOtech
 357   {">=", "&ge;"},       // ISOtech
 358   {"<<", "&Lt;"},       // ISOamsr
 359   {">>", "&Gt;"},       // ISOamsr
 360   {"!=", "&ne;"},       // ISOtech
 361   // Old troff \(eq and \(== go here
 362   {"=~", "&cong;"},     // ISOamsr
 363   // Old troff \(ap goes here
 364   {"~~", "&ap;"},       // ISOtech
 365   // This appears to be an error in the groff table.
 366   // It clashes with the Bell Labs use of ~= for a congruence sign
 367   // {"~=", "&ap;"},    // ISOamsr
 368   // Old troff \(pt, \(es, \(mo go here
 369   {"nm", "&notin;"},    // ISOtech
 370   {"nb", "&nsub;"},     // ISOamsr
 371   {"nc", "&nsup;"},     // ISOamsn
 372   {"ne", "&nequiv;"},   // ISOamsn
 373   // Old troff \(sb, \(sp, \(ib, \(ip, \(ca, \(cu go here
 374   {"/_", "&ang;"},      // ISOamso
 375   {"pp", "&perp;"},     // ISOtech
 376   // Old troff \(is goes here
 377   {"sum", "&sum;"},     // ISOamsb
 378   {"product", "&prod;"},        // ISOamsb
 379   {"gr", "&nabla;"},    // ISOtech
 380   // Old troff \(sr. \{n, \(if go here
 381   {"Ah", "&aleph;"},    // ISOtech
 382   {"Im", "&image;"},    // ISOamso: Fraktur I, imaginary
 383   {"Re", "&real;"},     // ISOamso: Fraktur R, real
 384   {"wp", "&weierp;"},   // ISOamso
 385   {"pd", "&part;"},     // ISOtech: partial differentiation sign
 386   // Their table duplicates the Greek letters here.
 387   // We list only the variant forms here, mapping them into
 388   // the ISO Greek 4 variants (which may or may not be correct :-()
 389   {"+f", "&b.phiv;"},   // ISOgrk4: variant phi
 390   {"+h", "&b.thetas;"}, // ISOgrk4: variant theta
 391   {"+p", "&b.omega;"},  // ISOgrk4: variant pi, looking like omega
 392   // Card symbols
 393   {"CL", "&clubs;"},    // ISOpub: club suit
 394   {"SP", "&spades;"},   // ISOpub: spade suit
 395   {"HE", "&hearts;"},   // ISOpub: heart suit
 396   {"DI", "&diams;"},    // ISOpub: diamond suit
 397 };
 398
 399 const char *special_to_entity(const char *sp)
 400 {
 401   struct map *mp;
 402   for (mp = entity_table; mp < entity_table + NELEM(entity_table); ++mp)
 403     if (!strcmp(mp->from, sp))
 404       return mp->to;
 405   return NULL;
 406 }
 407
 408 class char_box
 409 : public simple_box
 410 {
 411   unsigned char c;
 412   char next_is_italic;
 413   char prev_is_italic;
 414
 415 public:
 416   char_box(unsigned char);
 417   void debug_print();
 418   void output();
 419   int is_char();
 420   int left_is_italic();
 421   int right_is_italic();
 422   void hint(unsigned);
 423   void handle_char_type(int, int);
 424 };
 425
 426 class special_char_box
 427 : public simple_box
 428 {
 429   char *s;
 430
 431 public:
 432   special_char_box(const char *);
 433   ~special_char_box();
 434   void output();
 435   void debug_print();
 436   int is_char();
 437   void handle_char_type(int, int);
 438 };
 439
 440 enum spacing_type {
 441   s_ordinary,
 442   s_operator,
 443   s_binary,
 444   s_relation,
 445   s_opening,
 446   s_closing,
 447   s_punctuation,
 448   s_inner,
 449   s_suppress
 450 };
 451
 452 const char *spacing_type_table[] = { // FIXME const
 453   "ordinary",
 454   "operator",
 455   "binary",
 456   "relation",
 457   "opening",
 458   "closing",
 459   "punctuation",
 460   "inner",
 461   "suppress",
 462   0,
 463 };
 464
 465 const int DIGIT_TYPE = 0;
 466 const int LETTER_TYPE = 1;
 467
 468 const char *font_type_table[] = { // FIXME const
 469   "digit",
 470   "letter",
 471   0,
 472 };
 473
 474 class char_info
 475 {
 476 public:
 477   int spacing_type;
 478   int font_type;
 479   char_info();
 480 };
 481
 482 char_info::char_info()
 483 : spacing_type(ORDINARY_TYPE), font_type(DIGIT_TYPE)
 484 {
 485 }
 486
 487 static char_info char_table[256];
 488
 489 declare_ptable(char_info)
 490 implement_ptable(char_info)
 491
 492 PTABLE(char_info) special_char_table;
 493
 494 static int get_special_char_spacing_type(const char *ch)
 495 {
 496   char_info *p = special_char_table.lookup(ch);
 497   return p ? p->spacing_type : ORDINARY_TYPE;
 498 }
 499
 500 static int get_special_char_font_type(const char *ch)
 501 {
 502   char_info *p = special_char_table.lookup(ch);
 503   return p ? p->font_type : DIGIT_TYPE;
 504 }
 505
 506 static void set_special_char_type(const char *ch, int st, int ft)
 507 {
 508   char_info *p = special_char_table.lookup(ch);
 509   if (!p) {
 510     p = new char_info[1];
 511     special_char_table.define(ch, p);
 512   }
 513   if (st >= 0)
 514     p->spacing_type = st;
 515   if (ft >= 0)
 516     p->font_type = ft;
 517 }
 518
 519 void init_char_table()
 520 {
 521   set_special_char_type("pl", s_binary, -1);
 522   set_special_char_type("mi", s_binary, -1);
 523   set_special_char_type("eq", s_relation, -1);
 524   set_special_char_type("<=", s_relation, -1);
 525   set_special_char_type(">=", s_relation, -1);
 526   char_table['}'].spacing_type = s_closing;
 527   char_table[')'].spacing_type = s_closing;
 528   char_table[']'].spacing_type = s_closing;
 529   char_table['{'].spacing_type = s_opening;
 530   char_table['('].spacing_type = s_opening;
 531   char_table['['].spacing_type = s_opening;
 532   char_table[','].spacing_type = s_punctuation;
 533   char_table[';'].spacing_type = s_punctuation;
 534   char_table[':'].spacing_type = s_punctuation;
 535   char_table['.'].spacing_type = s_punctuation;
 536   char_table['>'].spacing_type = s_relation;
 537   char_table['<'].spacing_type = s_relation;
 538   char_table['*'].spacing_type = s_binary;
 539   for (int i = 0; i < 256; i++)
 540     if (csalpha(i))
 541       char_table[i].font_type = LETTER_TYPE;
 542 }
 543
 544 static int lookup_spacing_type(const char *type)
 545 {
 546   for (int i = 0; spacing_type_table[i] != 0; i++)
 547     if (strcmp(spacing_type_table[i], type) == 0)
 548       return i;
 549   return -1;
 550 }
 551
 552 static int lookup_font_type(const char *type)
 553 {
 554   for (int i = 0; font_type_table[i] != 0; i++)
 555     if (strcmp(font_type_table[i], type) == 0)
 556       return i;
 557   return -1;
 558 }
 559
 560 void box::set_spacing_type(char *type)
 561 {
 562   int t = lookup_spacing_type(type);
 563   if (t < 0)
 564     error("unrecognised type `%1'", type);
 565   else
 566     spacing_type = t;
 567   a_delete type;
 568 }
 569
 570 char_box::char_box(unsigned char cc)
 571 : c(cc), next_is_italic(0), prev_is_italic(0)
 572 {
 573   spacing_type = char_table[c].spacing_type;
 574 }
 575
 576 void char_box::hint(unsigned flags)
 577 {
 578   if (flags & HINT_PREV_IS_ITALIC)
 579     prev_is_italic = 1;
 580   if (flags & HINT_NEXT_IS_ITALIC)
 581     next_is_italic = 1;
 582 }
 583
 584 void char_box::output()
 585 {
 586   if (output_format == troff) {
 587     int font_type = char_table[c].font_type;
 588     if (font_type != LETTER_TYPE)
 589       printf("\\f[%s]", current_roman_font);
 590     if (!prev_is_italic)
 591       fputs("\\,", stdout);
 592     if (c == '\\')
 593       fputs("\\e", stdout);
 594     else
 595       putchar(c);
 596     if (!next_is_italic)
 597       fputs("\\/", stdout);
 598     else
 599       fputs("\\&", stdout);             // suppress ligaturing and kerning
 600     if (font_type != LETTER_TYPE)
 601       fputs("\\fP", stdout);
 602   }
 603   else if (output_format == mathml) {
 604     if (isdigit(c))
 605       printf("<mn>");
 606     else if (char_table[c].spacing_type)
 607       printf("<mo>");
 608     else
 609       printf("<mi>");
 610     if (c == '<')
 611       printf("&lt;");
 612     else if (c == '>')
 613       printf("&gt;");
 614     else if (c == '&')
 615       printf("&amp;");
 616     else
 617       putchar(c);
 618     if (isdigit(c))
 619       printf("</mn>");
 620     else if (char_table[c].spacing_type)
 621       printf("</mo>");
 622     else
 623       printf("</mi>");
 624   }
 625 }
 626
 627 int char_box::left_is_italic()
 628 {
 629   int font_type = char_table[c].font_type;
 630   return font_type == LETTER_TYPE;
 631 }
 632
 633 int char_box::right_is_italic()
 634 {
 635   int font_type = char_table[c].font_type;
 636   return font_type == LETTER_TYPE;
 637 }
 638
 639 int char_box::is_char()
 640 {
 641   return 1;
 642 }
 643
 644 void char_box::debug_print()
 645 {
 646   if (c == '\\') {
 647     putc('\\', stderr);
 648     putc('\\', stderr);
 649   }
 650   else
 651     putc(c, stderr);
 652 }
 653
 654 special_char_box::special_char_box(const char *t)
 655 {
 656   s = strsave(t);
 657   spacing_type = get_special_char_spacing_type(s);
 658 }
 659
 660 special_char_box::~special_char_box()
 661 {
 662   a_delete s;
 663 }
 664
 665 void special_char_box::output()
 666 {
 667   if (output_format == troff) {
 668     int font_type = get_special_char_font_type(s);
 669     if (font_type != LETTER_TYPE)
 670       printf("\\f[%s]", current_roman_font);
 671     printf("\\,\\[%s]\\/", s);
 672     if (font_type != LETTER_TYPE)
 673       printf("\\fP");
 674   }
 675   else if (output_format == mathml) {
 676     const char *entity = special_to_entity(s);
 677     if (entity != NULL)
 678       printf("<mo>%s</mo>", entity);
 679     else
 680       printf("<merror>unknown eqn/troff special char %s</merror>", s);
 681   }
 682 }
 683
 684 int special_char_box::is_char()
 685 {
 686   return 1;
 687 }
 688
 689 void special_char_box::debug_print()
 690 {
 691   fprintf(stderr, "\\[%s]", s);
 692 }
 693
 694 void char_box::handle_char_type(int st, int ft)
 695 {
 696   if (st >= 0)
 697     char_table[c].spacing_type = st;
 698   if (ft >= 0)
 699     char_table[c].font_type = ft;
 700 }
 701
 702 void special_char_box::handle_char_type(int st, int ft)
 703 {
 704   set_special_char_type(s, st, ft);
 705 }
 706
 707 void set_char_type(const char *type, char *ch)
 708 {
 709   assert(ch != 0);
 710   int st = lookup_spacing_type(type);
 711   int ft = lookup_font_type(type);
 712   if (st < 0 && ft < 0) {
 713     error("bad character type `%1'", type);
 714     a_delete ch;
 715     return;
 716   }
 717   box *b = split_text(ch);
 718   b->handle_char_type(st, ft);
 719   delete b;
 720 }
 721
 722 /* We give primes special treatment so that in ``x' sub 2'', the ``2''
 723 will be tucked under the prime */
 724
 725 class prime_box
 726 : public pointer_box
 727 {
 728   box *pb;
 729
 730 public:
 731   prime_box(box *);
 732   ~prime_box();
 733   int compute_metrics(int style);
 734   void output();
 735   void compute_subscript_kern();
 736   void debug_print();
 737   void handle_char_type(int, int);
 738 };
 739
 740 box *make_prime_box(box *pp)
 741 {
 742   return new prime_box(pp);
 743 }
 744
 745 prime_box::prime_box(box *pp) : pointer_box(pp)
 746 {
 747   pb = new special_char_box("fm");
 748 }
 749
 750 prime_box::~prime_box()
 751 {
 752   delete pb;
 753 }
 754
 755 int prime_box::compute_metrics(int style)
 756 {
 757   int res = p->compute_metrics(style);
 758   pb->compute_metrics(style);
 759   printf(".nr " WIDTH_FORMAT " 0\\n[" WIDTH_FORMAT "]"
 760          "+\\n[" WIDTH_FORMAT "]\n",
 761          uid, p->uid, pb->uid);
 762   printf(".nr " HEIGHT_FORMAT " \\n[" HEIGHT_FORMAT "]"
 763          ">?\\n[" HEIGHT_FORMAT "]\n",
 764          uid, p->uid, pb->uid);
 765   printf(".nr " DEPTH_FORMAT " \\n[" DEPTH_FORMAT "]"
 766          ">?\\n[" DEPTH_FORMAT "]\n",
 767          uid, p->uid, pb->uid);
 768   return res;
 769 }
 770
 771 void prime_box::compute_subscript_kern()
 772 {
 773   p->compute_subscript_kern();
 774   printf(".nr " SUB_KERN_FORMAT " 0\\n[" WIDTH_FORMAT "]"
 775          "+\\n[" SUB_KERN_FORMAT "]>?0\n",
 776          uid, pb->uid, p->uid);
 777 }
 778
 779 void prime_box::output()
 780 {
 781   p->output();
 782   pb->output();
 783 }
 784
 785 void prime_box::handle_char_type(int st, int ft)
 786 {
 787   p->handle_char_type(st, ft);
 788   pb->handle_char_type(st, ft);
 789 }
 790
 791 void prime_box::debug_print()
 792 {
 793   p->debug_print();
 794   putc('\'', stderr);
 795 }
 796
 797 box *split_text(char *text)
 798 {
 799   list_box *lb = 0;
 800   box *fb = 0;
 801   char *s = text;
 802   while (*s != '\0') {
 803     char c = *s++;
 804     box *b = 0;
 805     switch (c) {
 806     case '+':
 807       b = new special_char_box("pl");
 808       break;
 809     case '-':
 810       b = new special_char_box("mi");
 811       break;
 812     case '=':
 813       b = new special_char_box("eq");
 814       break;
 815     case '\'':
 816       b = new special_char_box("fm");
 817       break;
 818     case '<':
 819       if (*s == '=') {
 820         b = new special_char_box("<=");
 821         s++;
 822         break;
 823       }
 824       goto normal_char;
 825     case '>':
 826       if (*s == '=') {
 827         b = new special_char_box(">=");
 828         s++;
 829         break;
 830       }
 831       goto normal_char;
 832     case '\\':
 833       if (*s == '\0') {
 834         lex_error("bad escape");
 835         break;
 836       }
 837       c = *s++;
 838       switch (c) {
 839       case '(':
 840         {
 841           char buf[3];
 842           if (*s != '\0') {
 843             buf[0] = *s++;
 844             if (*s != '\0') {
 845               buf[1] = *s++;
 846               buf[2] = '\0';
 847               b = new special_char_box(buf);
 848             }
 849             else {
 850               lex_error("bad escape");
 851             }
 852           }
 853           else {
 854             lex_error("bad escape");
 855           }
 856         }
 857         break;
 858       case '[':
 859         {
 860           char *ch = s;
 861           while (*s != ']' && *s != '\0')
 862             s++;
 863           if (*s == '\0')
 864             lex_error("bad escape");
 865           else {
 866             *s++ = '\0';
 867             b = new special_char_box(ch);
 868           }
 869         }
 870         break;
 871       case 'f':
 872       case 'g':
 873       case 'k':
 874       case 'n':
 875       case '*':
 876         {
 877           char *escape_start = s - 2;
 878           switch (*s) {
 879           case '(':
 880             if (*++s != '\0')
 881               ++s;
 882             break;
 883           case '[':
 884             for (++s; *s != '\0' && *s != ']'; s++)
 885               ;
 886             break;
 887           }
 888           if (*s == '\0')
 889             lex_error("bad escape");
 890           else {
 891             ++s;
 892             char *buf = new char[s - escape_start + 1];
 893             memcpy(buf, escape_start, s - escape_start);
 894             buf[s - escape_start] = '\0';
 895             b = new quoted_text_box(buf);
 896           }
 897         }
 898         break;
 899       case '-':
 900       case '_':
 901         {
 902           char buf[2];
 903           buf[0] = c;
 904           buf[1] = '\0';
 905           b = new special_char_box(buf);
 906         }
 907         break;
 908       case '`':
 909         b = new special_char_box("ga");
 910         break;
 911       case '\'':
 912         b = new special_char_box("aa");
 913         break;
 914       case 'e':
 915       case '\\':
 916         b = new char_box('\\');
 917         break;
 918       case '^':
 919       case '|':
 920       case '0':
 921         {
 922           char buf[3];
 923           buf[0] = '\\';
 924           buf[1] = c;
 925           buf[2] = '\0';
 926           b = new quoted_text_box(strsave(buf));
 927           break;
 928         }
 929       default:
 930         lex_error("unquoted escape");
 931         b = new quoted_text_box(strsave(s - 2));
 932         s = strchr(s, '\0');
 933         break;
 934       }
 935       break;
 936     default:
 937     normal_char:
 938       b = new char_box(c);
 939       break;
 940     }
 941     while (*s == '\'') {
 942       if (b == 0)
 943         b = new quoted_text_box(0);
 944       b = new prime_box(b);
 945       s++;
 946     }
 947     if (b != 0) {
 948       if (lb != 0)
 949         lb->append(b);
 950       else if (fb != 0) {
 951         lb = new list_box(fb);
 952         lb->append(b);
 953       }
 954       else
 955         fb = b;
 956     }
 957   }
 958   a_delete text;
 959   if (lb != 0)
 960     return lb;
 961   else if (fb != 0)
 962     return fb;
 963   else
 964     return new quoted_text_box(0);
 965 }
 966
 967 // s-it2-mode