Bugfix: eqn: fix HTML entity (Brian McGuinness)..
[s-roff.git] / src / pre-eqn / text.cpp
blob3563d025e718430cda95409b95f4161f6340bf16
1 /*@
2 * Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
4 * Copyright (C) 1989 - 1992, 2003, 2007
5 * Free Software Foundation, Inc.
6 * Written by James Clark (jjc@jclark.com)
8 * This is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2, or (at your option) any later
11 * version.
13 * This is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 * for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with groff; see the file COPYING. If not, write to the Free Software
20 * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
23 #include "config.h"
24 #include "eqn-config.h"
26 #include <ctype.h>
28 #include "eqn.h"
29 #include "pbox.h"
30 #include "ptable.h"
32 struct map {
33 const char *from;
34 const char *to;
37 struct map entity_table[] = { // FIXME const
38 // Classic troff special characters
39 {"%", "&shy;"}, // ISOnum
40 {"'", "&acute;"}, // ISOdia
41 {"!=", "&ne;"}, // ISOtech
42 {"**", "&lowast;"}, // ISOtech
43 {"*a", "&alpha;"}, // ISOgrk3
44 {"*A", "A"},
45 {"*b", "&beta;"}, // ISOgrk3
46 {"*B", "B"},
47 {"*d", "&delta;"}, // ISOgrk3
48 {"*D", "&Delta;"}, // ISOgrk3
49 {"*e", "&epsilon;"}, // ISOgrk3
50 {"*E", "E"},
51 {"*f", "&phi;"}, // ISOgrk3
52 {"*F", "&Phi;"}, // ISOgrk3
53 {"*g", "&gamma;"}, // ISOgrk3
54 {"*G", "&Gamma;"}, // ISOgrk3
55 {"*h", "&theta;"}, // ISOgrk3
56 {"*H", "&Theta;"}, // ISOgrk3
57 {"*i", "&iota;"}, // ISOgrk3
58 {"*I", "I"},
59 {"*k", "&kappa;"}, // ISOgrk3
60 {"*K", "K;"},
61 {"*l", "&lambda;"}, // ISOgrk3
62 {"*L", "&Lambda;"}, // ISOgrk3
63 {"*m", "&mu;"}, // ISOgrk3
64 {"*M", "M"},
65 {"*n", "&nu;"}, // ISOgrk3
66 {"*N", "N"},
67 {"*o", "o"},
68 {"*O", "O"},
69 {"*p", "&pi;"}, // ISOgrk3
70 {"*P", "&Pi;"}, // ISOgrk3
71 {"*q", "&psi;"}, // ISOgrk3
72 {"*Q", "&PSI;"}, // ISOgrk3
73 {"*r", "&rho;"}, // ISOgrk3
74 {"*R", "R"},
75 {"*s", "&sigma;"}, // ISOgrk3
76 {"*S", "&Sigma;"}, // ISOgrk3
77 {"*t", "&tau;"}, // ISOgrk3
78 {"*T", "&Tau;"}, // ISOgrk3
79 {"*u", "&upsilon;"}, // ISOgrk3
80 {"*U", "&Upsilon;"}, // ISOgrk3
81 {"*w", "&omega;"}, // ISOgrk3
82 {"*W", "&Omega;"}, // ISOgrk3
83 {"*x", "&chi;"}, // ISOgrk3
84 {"*X", "&Chi;"}, // ISOgrk3
85 {"*y", "&eta;"}, // ISOgrk3
86 {"*Y", "&Eta;"}, // ISOgrk3
87 {"*z", "&zeta;"}, // ISOgrk3
88 {"*Z", "&Zeta;"}, // ISOgrk3
89 {"+-", "&plusmn;"}, // ISOnum
90 {"->", "&rarr;"}, // ISOnum
91 {"12", "&frac12;"}, // ISOnum
92 {"14", "&frac14;"}, // ISOnum
93 {"34", "&frac34;"}, // ISOnum
94 {"<-", "&larr;"}, // ISOnum
95 {"==", "&equiv;"}, // ISOtech
96 {"Fi", "&ffilig;"}, // ISOpub
97 {"Fl", "&ffllig;"}, // ISOpub
98 {"aa", "&acute;"}, // ISOdia
99 {"ap", "&sim;"}, // ISOtech
100 {"bl", "&phonexb;"}, // ISOpub
101 {"br", "&boxv;"}, // ISObox
102 {"bs", "&phone;"}, // ISOpub (for the Bell logo)
103 {"bu", "&bull;"}, // ISOpub
104 {"bv", "&verbar;"}, // ISOnum
105 {"ca", "&cap;"}, // ISOtech
106 {"ci", "&cir;"}, // ISOpub
107 {"co", "&copy;"}, // ISOnum
108 {"ct", "&cent;"}, // ISOnum
109 {"cu", "&cup;"}, // ISOtech
110 {"da", "&darr;"}, // ISOnum
111 {"de", "&deg;"}, // ISOnum
112 {"dg", "&dagger;"}, // ISOpub
113 {"dd", "&Dagger;"}, // ISOpub
114 {"di", "&divide;"}, // ISOnum
115 {"em", "&mdash;"}, // ISOpub
116 {"eq", "&equals;"}, // ISOnum
117 {"es", "&empty;"}, // ISOamso
118 {"ff", "&fflig;"}, // ISOpub
119 {"fi", "&filig;"}, // ISOpub
120 {"fl", "&fllig;"}, // ISOpub
121 {"fm", "&prime;"}, // ISOtech
122 {"ge", "&ge;"}, // ISOtech
123 {"gr", "&nabla;"}, // ISOtech
124 {"hy", "&hyphen;"}, // ISOnum
125 {"ib", "&sube;"}, // ISOtech
126 {"if", "&infin;"}, // ISOtech
127 {"ip", "&supe;"}, // ISOtech
128 {"is", "&int;"}, // ISOtech
129 {"le", "&le;"}, // ISOtech
130 // Some pile characters go here
131 {"mi", "&minus;"}, // ISOtech
132 {"mo", "&isin;"}, // ISOtech
133 {"mu", "&times;"}, // ISOnum
134 {"no", "&not;"}, // ISOnum
135 {"or", "&verbar;"}, // ISOnum
136 {"pl", "&plus;"}, // ISOnum
137 {"pt", "&prop;"}, // ISOtech
138 {"rg", "&trade;"}, // ISOnum
139 // More pile characters go here
140 {"rn", "&macr;"}, // ISOdia
141 {"ru", "&lowbar;"}, // ISOnum
142 {"sb", "&sub;"}, // ISOtech
143 {"sc", "&sect;"}, // ISOnum
144 {"sl", "/"},
145 {"sp", "&sup;"}, // ISOtech
146 {"sq", "&squf;"}, // ISOpub
147 {"sr", "&radic;"}, // ISOtech
148 {"ts", "&sigmav;"}, // ISOgrk3
149 {"ua", "&uarr;"}, // ISOnum
150 {"ul", "_"},
151 {"~=", "&cong;"}, // ISOtech
152 // Extended specials supported by groff; see groff_char(7).
153 // These are listed in the order they occur on that man page.
154 {"-D", "&ETH;"}, // ISOlat: Icelandic uppercase eth
155 {"Sd", "&eth;"}, // ISOlat1: Icelandic lowercase eth
156 {"TP", "&THORN;"}, // ISOlat1: Icelandic uppercase thorn
157 {"Tp", "&thorn;"}, // ISOlat1: Icelandic lowercase thorn
158 {"ss", "&szlig;"}, // ISOlat1
159 // Ligatures
160 // ff, fi, fl, ffi, ffl from old troff go here
161 {"AE", "&AElig;"}, // ISOlat1
162 {"ae", "&aelig;"}, // ISOlat1
163 {"OE", "&OElig;"}, // ISOlat2
164 {"oe", "&oelig;"}, // ISOlat2
165 {"IJ", "&ijlig;"}, // ISOlat2: Dutch IJ ligature
166 {"ij", "&IJlig;"}, // ISOlat2: Dutch ij ligature
167 {".i", "&inodot;"}, // ISOlat2,ISOamso
168 {".j", "&jnodot;"}, // ISOamso (undocumented but in 1.19)
169 // Accented characters
170 {"'A", "&Aacute;"}, // ISOlat1
171 {"'C", "&Cacute;"}, // ISOlat2
172 {"'E", "&Eacute;"}, // ISOlat1
173 {"'I", "&Iacute;"}, // ISOlat1
174 {"'O", "&Oacute;"}, // ISOlat1
175 {"'U", "&Uacute;"}, // ISOlat1
176 {"'Y", "&Yacute;"}, // ISOlat1
177 {"'a", "&aacute;"}, // ISOlat1
178 {"'c", "&cacute;"}, // ISOlat2
179 {"'e", "&eacute;"}, // ISOlat1
180 {"'i", "&iacute;"}, // ISOlat1
181 {"'o", "&oacute;"}, // ISOlat1
182 {"'u", "&uacute;"}, // ISOlat1
183 {"'y", "&yacute;"}, // ISOlat1
184 {":A", "&Auml;"}, // ISOlat1
185 {":E", "&Euml;"}, // ISOlat1
186 {":I", "&Iuml;"}, // ISOlat1
187 {":O", "&Ouml;"}, // ISOlat1
188 {":U", "&Uuml;"}, // ISOlat1
189 {":Y", "&Yuml;"}, // ISOlat2
190 {":a", "&auml;"}, // ISOlat1
191 {":e", "&euml;"}, // ISOlat1
192 {":i", "&iuml;"}, // ISOlat1
193 {":o", "&ouml;"}, // ISOlat1
194 {":u", "&uuml;"}, // ISOlat1
195 {":y", "&yuml;"}, // ISOlat1
196 {"^A", "&Acirc;"}, // ISOlat1
197 {"^E", "&Ecirc;"}, // ISOlat1
198 {"^I", "&Icirc;"}, // ISOlat1
199 {"^O", "&Ocirc;"}, // ISOlat1
200 {"^U", "&Ucirc;"}, // ISOlat1
201 {"^a", "&acirc;"}, // ISOlat1
202 {"^e", "&ecirc;"}, // ISOlat1
203 {"^i", "&icirc;"}, // ISOlat1
204 {"^o", "&ocirc;"}, // ISOlat1
205 {"^u", "&ucirc;"}, // ISOlat1
206 {"`A", "&Agrave;"}, // ISOlat1
207 {"`E", "&Egrave;"}, // ISOlat1
208 {"`I", "&Igrave;"}, // ISOlat1
209 {"`O", "&Ograve;"}, // ISOlat1
210 {"`U", "&Ugrave;"}, // ISOlat1
211 {"`a", "&agrave;"}, // ISOlat1
212 {"`e", "&egrave;"}, // ISOlat1
213 {"`i", "&igrave;"}, // ISOlat1
214 {"`o", "&ograve;"}, // ISOlat1
215 {"`u", "&ugrave;"}, // ISOlat1
216 {"~A", "&Atilde;"}, // ISOlat1
217 {"~N", "&Ntilde;"}, // ISOlat1
218 {"~O", "&Otilde;"}, // ISOlat1
219 {"~a", "&atilde;"}, // ISOlat1
220 {"~n", "&ntilde;"}, // ISOlat1
221 {"~o", "&otilde;"}, // ISOlat1
222 {"vS", "&Scaron;"}, // ISOlat2
223 {"vs", "&scaron;"}, // ISOlat2
224 {"vZ", "&Zcaron;"}, // ISOlat2
225 {"vz", "&zcaron;"}, // ISOlat2
226 {",C", "&Ccedil;"}, // ISOlat1
227 {",c", "&ccedil;"}, // ISOlat1
228 {"/L", "&Lstrok;"}, // ISOlat2: Polish L with a slash
229 {"/l", "&lstrok;"}, // ISOlat2: Polish l with a slash
230 {"/O", "&Oslash;"}, // ISOlat1
231 {"/o", "&oslash;"}, // ISOlat1
232 {"oA", "&Aring;"}, // ISOlat1
233 {"oa", "&aring;"}, // ISOlat1
234 // Accents
235 {"a\"","&dblac;"}, // ISOdia: double acute accent (Hungarian umlaut)
236 {"a-", "&macr;"}, // ISOdia: macron or bar accent
237 {"a.", "&dot;"}, // ISOdia: dot above
238 {"a^", "&circ;"}, // ISOdia: circumflex accent
239 {"aa", "&acute;"}, // ISOdia: acute accent
240 {"ga", "&grave;"}, // ISOdia: grave accent
241 {"ab", "&breve;"}, // ISOdia: breve accent
242 {"ac", "&cedil;"}, // ISOdia: cedilla accent
243 {"ad", "&uml;"}, // ISOdia: umlaut or dieresis
244 {"ah", "&caron;"}, // ISOdia: caron (aka hacek accent)
245 {"ao", "&ring;"}, // ISOdia: ring or circle accent
246 {"a~", "&tilde;"}, // ISOdia: tilde accent
247 {"ho", "&ogon;"}, // ISOdia: hook or ogonek accent
248 {"ha", "^"}, // ASCII circumflex, hat, caret
249 {"ti", "~"}, // ASCII tilde, large tilde
250 // Quotes
251 {"Bq", "&lsquor;"}, // ISOpub: low double comma quote
252 {"bq", "&ldquor;"}, // ISOpub: low single comma quote
253 {"lq", "&ldquo;"}, // ISOnum
254 {"rq", "&rdquo;"}, // ISOpub
255 {"oq", "&lsquo;"}, // ISOnum: single open quote
256 {"cq", "&rsquo;"}, // ISOnum: single closing quote (ASCII 39)
257 {"aq", "&zerosp;'"}, // apostrophe quote
258 {"dq", "\""}, // double quote (ASCII 34)
259 {"Fo", "&laquo;"}, // ISOnum
260 {"Fc", "&raquo;"}, // ISOnum
261 //{"fo", "&fo;"},
262 //{"fc", "&fc;"},
263 // Punctuation
264 {"r!", "&iexcl;"}, // ISOnum
265 {"r?", "&iquest;"}, // ISOnum
266 // Old troff \(em goes here
267 {"en", "&ndash;"}, // ISOpub: en dash
268 // Old troff \(hy goes here
269 // Brackets
270 {"lB", "&lsqb;"}, // ISOnum: left (square) bracket
271 {"rB", "&rsqb;"}, // ISOnum: right (square) bracket
272 {"lC", "&lcub;"}, // ISOnum: left (curly) brace
273 {"rC", "&rcub;"}, // ISOnum: right (curly) brace
274 {"la", "&lang;"}, // ISOtech: left angle bracket
275 {"ra", "&rang;"}, // ISOtech: right angle bracket
276 // Old troff \(bv goes here
277 // Bracket-pile characters could go here.
278 // Arrows
279 // Old troff \(<- and \(-> go here
280 {"<>", "&harr;"}, // ISOamsa
281 {"da", "&darr;"}, // ISOnum
282 {"ua", "&uarr;"}, // ISOnum
283 {"lA", "&lArr;"}, // ISOtech
284 {"rA", "&rArr;"}, // ISOtech
285 {"hA", "&iff;"}, // ISOtech: horizontal double-headed arrow
286 {"dA", "&dArr;"}, // ISOamsa
287 {"uA", "&uArr;"}, // ISOamsa
288 {"vA", "&vArr;"}, // ISOamsa: vertical double-headed double arrow
289 //{"an", "&an;"},
290 // Lines
291 {"-h", "&planck;"}, // ISOamso: h-bar (Planck's constant)
292 // Old troff \(or goes here
293 {"ba", "&verbar;"}, // ISOnum
294 // Old troff \(br, \{u, \(ul, \(bv go here
295 {"bb", "&brvbar;"}, // ISOnum
296 {"sl", "/"},
297 {"rs", "&bsol;"}, // ISOnum
298 // Text markers
299 // Old troff \(ci, \(bu, \(dd, \(dg go here
300 {"lz", "&loz;"}, // ISOpub
301 // Old troff sq goes here
302 {"ps", "&para;"}, // ISOnum: paragraph or pilcrow sign
303 {"sc", "&sect;"}, // ISOnum (in old troff)
304 // Old troff \(lh, \{h go here
305 {"at", "&commat;"}, // ISOnum
306 {"sh", "&num;"}, // ISOnum
307 //{"CR", "&CR;"},
308 {"OK", "&check;"}, // ISOpub
309 // Legalize
310 // Old troff \(co, \{g go here
311 {"tm", "&trade;"}, // ISOnum
312 // Currency symbols
313 {"Do", "&dollar;"}, // ISOnum
314 {"ct", "&cent;"}, // ISOnum
315 {"eu", "&euro;"},
316 {"Eu", "&euro;"},
317 {"Ye", "&yen;"}, // ISOnum
318 {"Po", "&pound;"}, // ISOnum
319 {"Cs", "&curren;"}, // ISOnum: currency sign
320 {"Fn", "&fnof"}, // ISOtech
321 // Units
322 // Old troff de goes here
323 {"%0", "&permil;"}, // ISOtech: per thousand, per mille sign
324 // Old troff \(fm goes here
325 {"sd", "&Prime;"}, // ISOtech
326 {"mc", "&micro;"}, // ISOnum
327 {"Of", "&ordf;"}, // ISOnum
328 {"Om", "&ordm;"}, // ISOnum
329 // Logical symbols
330 {"AN", "&and;"}, // ISOtech
331 {"OR", "&or;"}, // ISOtech
332 // Old troff \(no goes here
333 {"te", "&exist;"}, // ISOtech: there exists, existential quantifier
334 {"fa", "&forall;"}, // ISOtech: for all, universal quantifier
335 {"st", "&bepsi"}, // ISOamsr: such that
336 {"3d", "&there4;"}, // ISOtech
337 {"tf", "&there4;"}, // ISOtech
338 // Mathematical symbols
339 // Old troff "12", "14", "34" goes here
340 {"S1", "&sup1;"}, // ISOnum
341 {"S2", "&sup2;"}, // ISOnum
342 {"S3", "&sup3;"}, // ISOnum
343 // Old troff \(pl", \-, \(+- go here
344 {"t+-", "&plusmn;"}, // ISOnum
345 {"-+", "&mnplus;"}, // ISOtech
346 {"pc", "&middot;"}, // ISOnum
347 {"md", "&middot;"}, // ISOnum
348 // Old troff \(mu goes here
349 {"tmu", "&times;"}, // ISOnum
350 {"c*", "&otimes;"}, // ISOamsb: multiply sign in a circle
351 {"c+", "&oplus;"}, // ISOamsb: plus sign in a circle
352 // Old troff \(di goes here
353 {"tdi", "&divide;"}, // ISOnum
354 {"f/", "&horbar;"}, // ISOnum: horizintal bar for fractions
355 // Old troff \(** goes here
356 {"<=", "&le;"}, // ISOtech
357 {">=", "&ge;"}, // ISOtech
358 {"<<", "&Lt;"}, // ISOamsr
359 {">>", "&Gt;"}, // ISOamsr
360 {"!=", "&ne;"}, // ISOtech
361 // Old troff \(eq and \(== go here
362 {"=~", "&cong;"}, // ISOamsr
363 // Old troff \(ap goes here
364 {"~~", "&ap;"}, // ISOtech
365 // This appears to be an error in the groff table.
366 // It clashes with the Bell Labs use of ~= for a congruence sign
367 // {"~=", "&ap;"}, // ISOamsr
368 // Old troff \(pt, \(es, \(mo go here
369 {"nm", "&notin;"}, // ISOtech
370 {"nb", "&nsub;"}, // ISOamsr
371 {"nc", "&nsup;"}, // ISOamsn
372 {"ne", "&nequiv;"}, // ISOamsn
373 // Old troff \(sb, \(sp, \(ib, \(ip, \(ca, \(cu go here
374 {"/_", "&ang;"}, // ISOamso
375 {"pp", "&perp;"}, // ISOtech
376 // Old troff \(is goes here
377 {"sum", "&sum;"}, // ISOamsb
378 {"product", "&prod;"}, // ISOamsb
379 {"gr", "&nabla;"}, // ISOtech
380 // Old troff \(sr. \{n, \(if go here
381 {"Ah", "&aleph;"}, // ISOtech
382 {"Im", "&image;"}, // ISOamso: Fraktur I, imaginary
383 {"Re", "&real;"}, // ISOamso: Fraktur R, real
384 {"wp", "&weierp;"}, // ISOamso
385 {"pd", "&part;"}, // ISOtech: partial differentiation sign
386 // Their table duplicates the Greek letters here.
387 // We list only the variant forms here, mapping them into
388 // the ISO Greek 4 variants (which may or may not be correct :-()
389 {"+f", "&b.phiv;"}, // ISOgrk4: variant phi
390 {"+h", "&b.thetas;"}, // ISOgrk4: variant theta
391 {"+p", "&b.omega;"}, // ISOgrk4: variant pi, looking like omega
392 // Card symbols
393 {"CL", "&clubs;"}, // ISOpub: club suit
394 {"SP", "&spades;"}, // ISOpub: spade suit
395 {"HE", "&hearts;"}, // ISOpub: heart suit
396 {"DI", "&diams;"}, // ISOpub: diamond suit
399 const char *special_to_entity(const char *sp)
401 struct map *mp;
402 for (mp = entity_table; mp < entity_table + NELEM(entity_table); ++mp)
403 if (!strcmp(mp->from, sp))
404 return mp->to;
405 return NULL;
408 class char_box
409 : public simple_box
411 unsigned char c;
412 char next_is_italic;
413 char prev_is_italic;
415 public:
416 char_box(unsigned char);
417 void debug_print();
418 void output();
419 int is_char();
420 int left_is_italic();
421 int right_is_italic();
422 void hint(unsigned);
423 void handle_char_type(int, int);
426 class special_char_box
427 : public simple_box
429 char *s;
431 public:
432 special_char_box(const char *);
433 ~special_char_box();
434 void output();
435 void debug_print();
436 int is_char();
437 void handle_char_type(int, int);
440 enum spacing_type {
441 s_ordinary,
442 s_operator,
443 s_binary,
444 s_relation,
445 s_opening,
446 s_closing,
447 s_punctuation,
448 s_inner,
449 s_suppress
452 const char *spacing_type_table[] = { // FIXME const
453 "ordinary",
454 "operator",
455 "binary",
456 "relation",
457 "opening",
458 "closing",
459 "punctuation",
460 "inner",
461 "suppress",
465 const int DIGIT_TYPE = 0;
466 const int LETTER_TYPE = 1;
468 const char *font_type_table[] = { // FIXME const
469 "digit",
470 "letter",
474 class char_info
476 public:
477 int spacing_type;
478 int font_type;
479 char_info();
482 char_info::char_info()
483 : spacing_type(ORDINARY_TYPE), font_type(DIGIT_TYPE)
487 static char_info char_table[256];
489 declare_ptable(char_info)
490 implement_ptable(char_info)
492 PTABLE(char_info) special_char_table;
494 static int get_special_char_spacing_type(const char *ch)
496 char_info *p = special_char_table.lookup(ch);
497 return p ? p->spacing_type : ORDINARY_TYPE;
500 static int get_special_char_font_type(const char *ch)
502 char_info *p = special_char_table.lookup(ch);
503 return p ? p->font_type : DIGIT_TYPE;
506 static void set_special_char_type(const char *ch, int st, int ft)
508 char_info *p = special_char_table.lookup(ch);
509 if (!p) {
510 p = new char_info[1];
511 special_char_table.define(ch, p);
513 if (st >= 0)
514 p->spacing_type = st;
515 if (ft >= 0)
516 p->font_type = ft;
519 void init_char_table()
521 set_special_char_type("pl", s_binary, -1);
522 set_special_char_type("mi", s_binary, -1);
523 set_special_char_type("eq", s_relation, -1);
524 set_special_char_type("<=", s_relation, -1);
525 set_special_char_type(">=", s_relation, -1);
526 char_table['}'].spacing_type = s_closing;
527 char_table[')'].spacing_type = s_closing;
528 char_table[']'].spacing_type = s_closing;
529 char_table['{'].spacing_type = s_opening;
530 char_table['('].spacing_type = s_opening;
531 char_table['['].spacing_type = s_opening;
532 char_table[','].spacing_type = s_punctuation;
533 char_table[';'].spacing_type = s_punctuation;
534 char_table[':'].spacing_type = s_punctuation;
535 char_table['.'].spacing_type = s_punctuation;
536 char_table['>'].spacing_type = s_relation;
537 char_table['<'].spacing_type = s_relation;
538 char_table['*'].spacing_type = s_binary;
539 for (int i = 0; i < 256; i++)
540 if (csalpha(i))
541 char_table[i].font_type = LETTER_TYPE;
544 static int lookup_spacing_type(const char *type)
546 for (int i = 0; spacing_type_table[i] != 0; i++)
547 if (strcmp(spacing_type_table[i], type) == 0)
548 return i;
549 return -1;
552 static int lookup_font_type(const char *type)
554 for (int i = 0; font_type_table[i] != 0; i++)
555 if (strcmp(font_type_table[i], type) == 0)
556 return i;
557 return -1;
560 void box::set_spacing_type(char *type)
562 int t = lookup_spacing_type(type);
563 if (t < 0)
564 error("unrecognised type `%1'", type);
565 else
566 spacing_type = t;
567 a_delete type;
570 char_box::char_box(unsigned char cc)
571 : c(cc), next_is_italic(0), prev_is_italic(0)
573 spacing_type = char_table[c].spacing_type;
576 void char_box::hint(unsigned flags)
578 if (flags & HINT_PREV_IS_ITALIC)
579 prev_is_italic = 1;
580 if (flags & HINT_NEXT_IS_ITALIC)
581 next_is_italic = 1;
584 void char_box::output()
586 if (output_format == troff) {
587 int font_type = char_table[c].font_type;
588 if (font_type != LETTER_TYPE)
589 printf("\\f[%s]", current_roman_font);
590 if (!prev_is_italic)
591 fputs("\\,", stdout);
592 if (c == '\\')
593 fputs("\\e", stdout);
594 else
595 putchar(c);
596 if (!next_is_italic)
597 fputs("\\/", stdout);
598 else
599 fputs("\\&", stdout); // suppress ligaturing and kerning
600 if (font_type != LETTER_TYPE)
601 fputs("\\fP", stdout);
603 else if (output_format == mathml) {
604 if (isdigit(c))
605 printf("<mn>");
606 else if (char_table[c].spacing_type)
607 printf("<mo>");
608 else
609 printf("<mi>");
610 if (c == '<')
611 printf("&lt;");
612 else if (c == '>')
613 printf("&gt;");
614 else if (c == '&')
615 printf("&amp;");
616 else
617 putchar(c);
618 if (isdigit(c))
619 printf("</mn>");
620 else if (char_table[c].spacing_type)
621 printf("</mo>");
622 else
623 printf("</mi>");
627 int char_box::left_is_italic()
629 int font_type = char_table[c].font_type;
630 return font_type == LETTER_TYPE;
633 int char_box::right_is_italic()
635 int font_type = char_table[c].font_type;
636 return font_type == LETTER_TYPE;
639 int char_box::is_char()
641 return 1;
644 void char_box::debug_print()
646 if (c == '\\') {
647 putc('\\', stderr);
648 putc('\\', stderr);
650 else
651 putc(c, stderr);
654 special_char_box::special_char_box(const char *t)
656 s = strsave(t);
657 spacing_type = get_special_char_spacing_type(s);
660 special_char_box::~special_char_box()
662 a_delete s;
665 void special_char_box::output()
667 if (output_format == troff) {
668 int font_type = get_special_char_font_type(s);
669 if (font_type != LETTER_TYPE)
670 printf("\\f[%s]", current_roman_font);
671 printf("\\,\\[%s]\\/", s);
672 if (font_type != LETTER_TYPE)
673 printf("\\fP");
675 else if (output_format == mathml) {
676 const char *entity = special_to_entity(s);
677 if (entity != NULL)
678 printf("<mo>%s</mo>", entity);
679 else
680 printf("<merror>unknown eqn/troff special char %s</merror>", s);
684 int special_char_box::is_char()
686 return 1;
689 void special_char_box::debug_print()
691 fprintf(stderr, "\\[%s]", s);
694 void char_box::handle_char_type(int st, int ft)
696 if (st >= 0)
697 char_table[c].spacing_type = st;
698 if (ft >= 0)
699 char_table[c].font_type = ft;
702 void special_char_box::handle_char_type(int st, int ft)
704 set_special_char_type(s, st, ft);
707 void set_char_type(const char *type, char *ch)
709 assert(ch != 0);
710 int st = lookup_spacing_type(type);
711 int ft = lookup_font_type(type);
712 if (st < 0 && ft < 0) {
713 error("bad character type `%1'", type);
714 a_delete ch;
715 return;
717 box *b = split_text(ch);
718 b->handle_char_type(st, ft);
719 delete b;
722 /* We give primes special treatment so that in ``x' sub 2'', the ``2''
723 will be tucked under the prime */
725 class prime_box
726 : public pointer_box
728 box *pb;
730 public:
731 prime_box(box *);
732 ~prime_box();
733 int compute_metrics(int style);
734 void output();
735 void compute_subscript_kern();
736 void debug_print();
737 void handle_char_type(int, int);
740 box *make_prime_box(box *pp)
742 return new prime_box(pp);
745 prime_box::prime_box(box *pp) : pointer_box(pp)
747 pb = new special_char_box("fm");
750 prime_box::~prime_box()
752 delete pb;
755 int prime_box::compute_metrics(int style)
757 int res = p->compute_metrics(style);
758 pb->compute_metrics(style);
759 printf(".nr " WIDTH_FORMAT " 0\\n[" WIDTH_FORMAT "]"
760 "+\\n[" WIDTH_FORMAT "]\n",
761 uid, p->uid, pb->uid);
762 printf(".nr " HEIGHT_FORMAT " \\n[" HEIGHT_FORMAT "]"
763 ">?\\n[" HEIGHT_FORMAT "]\n",
764 uid, p->uid, pb->uid);
765 printf(".nr " DEPTH_FORMAT " \\n[" DEPTH_FORMAT "]"
766 ">?\\n[" DEPTH_FORMAT "]\n",
767 uid, p->uid, pb->uid);
768 return res;
771 void prime_box::compute_subscript_kern()
773 p->compute_subscript_kern();
774 printf(".nr " SUB_KERN_FORMAT " 0\\n[" WIDTH_FORMAT "]"
775 "+\\n[" SUB_KERN_FORMAT "]>?0\n",
776 uid, pb->uid, p->uid);
779 void prime_box::output()
781 p->output();
782 pb->output();
785 void prime_box::handle_char_type(int st, int ft)
787 p->handle_char_type(st, ft);
788 pb->handle_char_type(st, ft);
791 void prime_box::debug_print()
793 p->debug_print();
794 putc('\'', stderr);
797 box *split_text(char *text)
799 list_box *lb = 0;
800 box *fb = 0;
801 char *s = text;
802 while (*s != '\0') {
803 char c = *s++;
804 box *b = 0;
805 switch (c) {
806 case '+':
807 b = new special_char_box("pl");
808 break;
809 case '-':
810 b = new special_char_box("mi");
811 break;
812 case '=':
813 b = new special_char_box("eq");
814 break;
815 case '\'':
816 b = new special_char_box("fm");
817 break;
818 case '<':
819 if (*s == '=') {
820 b = new special_char_box("<=");
821 s++;
822 break;
824 goto normal_char;
825 case '>':
826 if (*s == '=') {
827 b = new special_char_box(">=");
828 s++;
829 break;
831 goto normal_char;
832 case '\\':
833 if (*s == '\0') {
834 lex_error("bad escape");
835 break;
837 c = *s++;
838 switch (c) {
839 case '(':
841 char buf[3];
842 if (*s != '\0') {
843 buf[0] = *s++;
844 if (*s != '\0') {
845 buf[1] = *s++;
846 buf[2] = '\0';
847 b = new special_char_box(buf);
849 else {
850 lex_error("bad escape");
853 else {
854 lex_error("bad escape");
857 break;
858 case '[':
860 char *ch = s;
861 while (*s != ']' && *s != '\0')
862 s++;
863 if (*s == '\0')
864 lex_error("bad escape");
865 else {
866 *s++ = '\0';
867 b = new special_char_box(ch);
870 break;
871 case 'f':
872 case 'g':
873 case 'k':
874 case 'n':
875 case '*':
877 char *escape_start = s - 2;
878 switch (*s) {
879 case '(':
880 if (*++s != '\0')
881 ++s;
882 break;
883 case '[':
884 for (++s; *s != '\0' && *s != ']'; s++)
886 break;
888 if (*s == '\0')
889 lex_error("bad escape");
890 else {
891 ++s;
892 char *buf = new char[s - escape_start + 1];
893 memcpy(buf, escape_start, s - escape_start);
894 buf[s - escape_start] = '\0';
895 b = new quoted_text_box(buf);
898 break;
899 case '-':
900 case '_':
902 char buf[2];
903 buf[0] = c;
904 buf[1] = '\0';
905 b = new special_char_box(buf);
907 break;
908 case '`':
909 b = new special_char_box("ga");
910 break;
911 case '\'':
912 b = new special_char_box("aa");
913 break;
914 case 'e':
915 case '\\':
916 b = new char_box('\\');
917 break;
918 case '^':
919 case '|':
920 case '0':
922 char buf[3];
923 buf[0] = '\\';
924 buf[1] = c;
925 buf[2] = '\0';
926 b = new quoted_text_box(strsave(buf));
927 break;
929 default:
930 lex_error("unquoted escape");
931 b = new quoted_text_box(strsave(s - 2));
932 s = strchr(s, '\0');
933 break;
935 break;
936 default:
937 normal_char:
938 b = new char_box(c);
939 break;
941 while (*s == '\'') {
942 if (b == 0)
943 b = new quoted_text_box(0);
944 b = new prime_box(b);
945 s++;
947 if (b != 0) {
948 if (lb != 0)
949 lb->append(b);
950 else if (fb != 0) {
951 lb = new list_box(fb);
952 lb->append(b);
954 else
955 fb = b;
958 a_delete text;
959 if (lb != 0)
960 return lb;
961 else if (fb != 0)
962 return fb;
963 else
964 return new quoted_text_box(0);
967 // s-it2-mode