2 * Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
4 * Copyright (C) 2002, 2003, 2004, 2006
5 * Free Software Foundation, Inc.
6 * Written by Werner Lemberg <wl@gnu.org>
8 * This is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2, or (at your option) any later
13 * This is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * You should have received a copy of the GNU General Public License along
19 * with groff; see the file COPYING. If not, write to the Free Software
20 * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
27 #include "stringclass.h"
30 struct glyph_to_unicode
{
34 declare_ptable(glyph_to_unicode
)
35 implement_ptable(glyph_to_unicode
)
37 PTABLE(glyph_to_unicode
) glyph_to_unicode_table
;
39 // The entries commented out in the table below can't be used in glyph
45 } glyph_to_unicode_list
[] = { // FIXME const?
129 { "ff", "0066_0066" },
130 { "Fi", "0066_0066_0069" },
131 { "Fl", "0066_0066_006C" },
132 { "fi", "0066_0069" },
133 { "fl", "0066_006C" },
177 // The soft hypen U+00AD is meaningful only in the input file,
178 // not in the output.
330 // the curly phi variant
336 // the stroked phi variant
340 // `-' and `hy' denote a HYPHEN, usually a glyph with a smaller width than
341 // the MINUS sign. Users who are viewing broken man pages that assume
342 // that `-' denotes a U+002D character can either fix the broken man pages
343 // or apply the workaround described in the PROBLEMS file.
400 { "product", "220F" },
401 { "coproduct", "2210" },
403 // `mi' and `\-' represent a MINUS sign. But it is used in many man pages
404 // to denote the U+002D character that introduces a command-line option.
405 // For devices that support copy&paste, such as devhtml and devutf8, the
406 // user can apply the workaround described in the PROBLEMS file.
421 { "integral", "222B" },
450 { "parenlefttp", "239B" },
451 { "parenleftex", "239C" },
452 { "parenleftbt", "239D" },
453 { "parenrighttp", "239E" },
454 { "parenrightex", "239F" },
455 { "parenrightbt", "23A0" },
456 { "bracketlefttp", "23A1" },
457 { "bracketleftex", "23A2" },
458 { "bracketleftbt", "23A3" },
459 { "bracketrighttp", "23A4" },
460 { "bracketrightex", "23A5" },
461 { "bracketrightbt", "23A6" },
463 { "bracelefttp", "23A7" },
465 { "braceleftmid", "23A8" },
467 { "braceleftbt", "23A9" },
469 { "braceex", "23AA" },
470 { "braceleftex", "23AA" },
471 { "bracerightex", "23AA" },
473 { "bracerighttp", "23AB" },
475 { "bracerightmid", "23AC" },
477 { "bracerightbt", "23AD" },
490 // The `left angle bracket' and `right angle bracket' could be mapped to
491 // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9. But the first
492 // and second possibility are double-width characters (see Unicode's
493 // `DerivedEastAsianWidth.txt' file) and are therefore not suitable for
494 // general use, whereas the third possibility is single-width.
496 // The devhtml device overrides this mapping, because
498 // http://www.w3.org/TR/html401/sgml/entities.html
500 // says that in HTML, `⟨' and `⟩' are U+2329,U+232A,
506 // global constructor FIXME static CTOR
507 static struct glyph_to_unicode_init
{
508 glyph_to_unicode_init();
509 } _glyph_to_unicode_init
;
511 glyph_to_unicode_init::glyph_to_unicode_init()
513 for (unsigned int i
= 0;
514 i
< sizeof(glyph_to_unicode_list
)/sizeof(glyph_to_unicode_list
[0]);
516 glyph_to_unicode
*gtu
= new glyph_to_unicode
[1];
517 gtu
->value
= (char *)glyph_to_unicode_list
[i
].value
;
518 glyph_to_unicode_table
.define(glyph_to_unicode_list
[i
].key
, gtu
);
522 const char *glyph_name_to_unicode(const char *s
)
524 glyph_to_unicode
*result
= glyph_to_unicode_table
.lookup(s
);
525 return result
? result
->value
: 0;