Update to groff 1.19.2.
[dragonfly.git] / contrib / groff-1.19 / src / utils / hpftodit / hpftodit.cpp
blob5910fb2f39e650081fe6ab7fe8dfedcd4009c17e
1 // -*- C++ -*-
2 /* Copyright (C) 1994, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
22 TODO
23 devise new names for useful characters
24 option to specify symbol sets to look in
25 put filename in error messages (or fix lib)
28 #include "lib.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <math.h>
35 #include <errno.h>
36 #include "assert.h"
37 #include "posix.h"
38 #include "errarg.h"
39 #include "error.h"
40 #include "cset.h"
41 #include "nonposix.h"
42 #include "unicode.h"
44 extern "C" const char *Version_string;
45 extern const char *hp_msl_to_unicode_code(const char *);
47 #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
48 #define equal(a, b) (strcmp(a, b) == 0)
49 // only valid if is_uname(c) has returned true
50 #define is_decomposed(c) strchr(c, '_')
52 #define NO 0
53 #define YES 1
55 #define MSL 0
56 #define SYMSET 1
57 #define UNICODE 2
59 #define UNNAMED "---"
61 static double multiplier = 3.0; // make Agfa-based unitwidth an integer
63 inline
64 int scale(int n)
66 return int(n * multiplier + 0.5);
69 // tags in TFM file
71 enum tag_type {
72 min_tag = 400,
73 type_tag = 400,
74 copyright_tag = 401,
75 comment_tag = 402,
76 charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType
77 symbol_set_tag = 404,
78 unique_identifier_tag = 405,
79 inches_per_point_tag = 406,
80 nominal_point_size_tag = 407,
81 design_units_per_em_tag = 408,
82 posture_tag = 409,
83 type_structure_tag = 410,
84 stroke_weight_tag = 411,
85 spacing_tag = 412,
86 slant_tag = 413,
87 appearance_width_tag = 414,
88 serif_style_tag = 415,
89 font_name_tag = 417,
90 typeface_source_tag = 418,
91 average_width_tag = 419,
92 max_width_tag = 420,
93 word_spacing_tag = 421,
94 recommended_line_spacing_tag = 422,
95 cap_height_tag = 423,
96 x_height_tag = 424,
97 max_ascent_tag = 425,
98 max_descent_tag = 426,
99 lower_ascent_tag = 427,
100 lower_descent_tag = 428,
101 underscore_depth_tag = 429,
102 underscore_thickness_tag = 430,
103 uppercase_accent_height_tag = 431,
104 lowercase_accent_height_tag = 432,
105 width_tag = 433,
106 vertical_escapement_tag = 434,
107 left_extent_tag = 435,
108 right_extent_tag = 436,
109 ascent_tag = 437,
110 descent_tag = 438,
111 pair_kern_tag = 439,
112 sector_kern_tag = 440,
113 track_kern_tag = 441,
114 typeface_tag = 442,
115 panose_tag = 443,
116 max_tag = 443
119 const char *tag_name[] = {
120 "Symbol Set",
121 "Font Type" // MSL for Intellifont, Unicode for TrueType
124 // types in TFM file
125 enum {
126 BYTE_TYPE = 1,
127 ASCII_TYPE = 2, // NUL-terminated string
128 USHORT_TYPE = 3,
129 LONG_TYPE = 4, // unused
130 RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator
131 SIGNED_BYTE_TYPE = 16, // unused
132 SIGNED_SHORT_TYPE = 17,
133 SIGNED_LONG_TYPE = 18 // unused
136 typedef unsigned char byte;
137 typedef unsigned short uint16;
138 typedef short int16;
139 typedef unsigned int uint32;
141 class File {
142 public:
143 File(const char *);
144 void skip(int n);
145 byte get_byte();
146 uint16 get_uint16();
147 uint32 get_uint32();
148 uint32 get_uint32(char *orig);
149 void seek(uint32 n);
150 private:
151 unsigned char *buf_;
152 const unsigned char *ptr_;
153 const unsigned char *end_;
156 struct entry {
157 char present;
158 uint16 type;
159 uint32 count;
160 uint32 value;
161 char orig_value[4];
162 entry() : present(0) { }
165 struct char_info {
166 uint16 charcode;
167 uint16 width;
168 int16 ascent;
169 int16 descent;
170 int16 left_extent;
171 uint16 right_extent;
172 uint16 symbol_set;
173 unsigned char code;
176 const uint16 NO_GLYPH = 0xffff;
177 const uint16 NO_SYMBOL_SET = 0;
179 struct name_list {
180 char *name;
181 name_list *next;
182 name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { }
183 ~name_list() { a_delete name; }
186 struct symbol_set {
187 uint16 select;
188 uint16 index[256];
191 #define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
193 uint16 text_symbol_sets[] = {
194 SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252)
195 SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250
196 SYMBOL_SET(5, 'T'), // Code Page 1254
197 SYMBOL_SET(7, 'J'), // Desktop
198 SYMBOL_SET(6, 'J'), // Microsoft Publishing
199 SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U,
200 // so we should never get here)
201 SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9E,
202 // so we should never get here)
203 SYMBOL_SET(8, 'U'), // HP Roman 8
204 SYMBOL_SET(10, 'J'), // PS Standard
205 SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI"
206 SYMBOL_SET(1, 'U'), // U.S. Legal
208 SYMBOL_SET(12, 'J'), // MC Text
209 SYMBOL_SET(10, 'U'), // PC Code Page 437
210 SYMBOL_SET(11, 'U'), // PC Code Page 437N
211 SYMBOL_SET(17, 'U'), // PC Code Page 852
212 SYMBOL_SET(12, 'U'), // PC Code Page 850
213 SYMBOL_SET(9, 'T'), // PC Code Page 437T
217 uint16 special_symbol_sets[] = {
218 SYMBOL_SET(8, 'M'), // Math 8
219 SYMBOL_SET(5, 'M'), // PS Math
220 SYMBOL_SET(15, 'U'), // Pi font
221 SYMBOL_SET(13, 'J'), // Ventura International
222 SYMBOL_SET(19, 'M'), // Symbol font
223 SYMBOL_SET(579, 'L'), // Wingdings
227 entry tags[max_tag + 1 - min_tag];
229 char_info *char_table;
230 uint32 nchars = 0;
232 unsigned int charcode_name_table_size = 0;
233 name_list **charcode_name_table = NULL;
235 symbol_set *symbol_set_table;
236 unsigned int n_symbol_sets;
238 static int debug_flag = NO;
239 static int special_flag = NO; // not a special font
240 static int italic_flag = NO; // don't add italic correction
241 static int italic_sep;
242 static int all_flag = NO; // don't include glyphs not in mapfile
243 static int quiet_flag = NO; // don't suppress warnings about symbols not found
245 static char *hp_msl_to_ucode_name(int);
246 static char *unicode_to_ucode_name(int);
247 static int is_uname(char *);
248 static char *show_symset(unsigned int);
249 static void usage(FILE *);
250 static void usage();
251 static const char *xbasename(const char *);
252 static void read_tags(File &);
253 static int check_type();
254 static void check_units(File &, const int, double *, double *);
255 static int read_map(const char *, const int);
256 static void require_tag(tag_type);
257 static void dump_ascii(File &, tag_type);
258 static void dump_tags(File &);
259 static void dump_symbol_sets(File &);
260 static void dump_symbols(int);
261 static void output_font_name(File &);
262 static void output_spacewidth();
263 static void output_pclweight();
264 static void output_pclproportional();
265 static void read_and_output_pcltypeface(File &);
266 static void output_pclstyle();
267 static void output_slant();
268 static void output_ligatures();
269 static void read_symbol_sets(File &);
270 static void read_and_output_kernpairs(File &);
271 static void output_charset(const int);
272 static void read_char_table(File &);
274 inline
275 entry &tag_info(tag_type t)
277 return tags[t - min_tag];
281 main(int argc, char **argv)
283 program_name = argv[0];
285 int opt;
286 int res = 1200; // PCL unit of measure for cursor moves
287 int scalesize = 4; // LaserJet 4 only allows 1/4 point increments
288 int unitwidth = 6350;
289 double ppi; // points per inch
290 double upem; // design units per em
292 static const struct option long_options[] = {
293 { "help", no_argument, 0, CHAR_MAX + 1 },
294 { "version", no_argument, 0, 'v' },
295 { NULL, 0, 0, 0 }
297 while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
298 switch (opt) {
299 case 'a':
300 all_flag = YES;
301 break;
302 case 'd':
303 debug_flag = YES;
304 break;
305 case 's':
306 special_flag = YES;
307 break;
308 case 'i':
309 italic_flag = YES;
310 italic_sep = atoi(optarg); // design units
311 break;
312 case 'q':
313 quiet_flag = YES; // suppress warnings about symbols not found
314 break;
315 case 'v':
316 printf("GNU hpftodit (groff) version %s\n", Version_string);
317 exit(0);
318 break;
319 case CHAR_MAX + 1: // --help
320 usage(stdout);
321 exit(0);
322 break;
323 case '?':
324 usage();
325 break;
326 default:
327 assert(0);
331 if (debug_flag && argc - optind < 1)
332 usage();
333 else if (!debug_flag && argc - optind != 3)
334 usage();
335 File f(argv[optind]);
336 read_tags(f);
337 int tfm_type = check_type();
338 if (debug_flag)
339 dump_tags(f);
340 if (!debug_flag && !read_map(argv[optind + 1], tfm_type))
341 exit(1);
342 else if (debug_flag && argc - optind > 1)
343 read_map(argv[optind + 1], tfm_type);
344 current_filename = NULL;
345 current_lineno = -1; // no line numbers
346 if (!debug_flag && !equal(argv[optind + 2], "-"))
347 if (freopen(argv[optind + 2], "w", stdout) == NULL)
348 fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
349 current_filename = argv[optind];
351 check_units(f, tfm_type, &ppi, &upem);
352 if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs
353 multiplier = double(res) / upem / ppi * unitwidth / scalesize;
354 if (italic_flag)
355 // convert from thousandths of an em to design units
356 italic_sep = int(italic_sep * upem / 1000 + 0.5);
358 read_char_table(f);
359 if (nchars == 0)
360 fatal("no characters");
362 if (!debug_flag) {
363 output_font_name(f);
364 printf("name %s\n", xbasename(argv[optind + 2]));
365 if (special_flag)
366 printf("special\n");
367 output_spacewidth();
368 output_slant();
369 read_and_output_pcltypeface(f);
370 output_pclproportional();
371 output_pclweight();
372 output_pclstyle();
374 read_symbol_sets(f);
375 if (debug_flag)
376 dump_symbols(tfm_type);
377 else {
378 output_ligatures();
379 read_and_output_kernpairs(f);
380 output_charset(tfm_type);
382 return 0;
385 static void
386 usage(FILE *stream)
388 fprintf(stream,
389 "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n"
390 " %s -d tfm_file [map_file]\n",
391 program_name, program_name);
394 static void
395 usage()
397 usage(stderr);
398 exit(1);
401 File::File(const char *s)
403 // We need to read the file in binary mode because hpftodit relies
404 // on byte counts.
405 int fd = open(s, O_RDONLY | O_BINARY);
406 if (fd < 0)
407 fatal("cannot open `%1': %2", s, strerror(errno));
408 current_filename = s;
409 struct stat sb;
410 if (fstat(fd, &sb) < 0)
411 fatal("cannot stat: %1", strerror(errno));
412 if (!S_ISREG(sb.st_mode))
413 fatal("not a regular file");
414 buf_ = new unsigned char[sb.st_size];
415 long nread = read(fd, buf_, sb.st_size);
416 if (nread < 0)
417 fatal("read error: %1", strerror(errno));
418 if (nread != sb.st_size)
419 fatal("read unexpected number of bytes");
420 ptr_ = buf_;
421 end_ = buf_ + sb.st_size;
424 void
425 File::skip(int n)
427 if (end_ - ptr_ < n)
428 fatal("unexpected end of file");
429 ptr_ += n;
432 void
433 File::seek(uint32 n)
435 if (uint32(end_ - buf_) < n)
436 fatal("unexpected end of file");
437 ptr_ = buf_ + n;
440 byte
441 File::get_byte()
443 if (ptr_ >= end_)
444 fatal("unexpected end of file");
445 return *ptr_++;
448 uint16
449 File::get_uint16()
451 if (end_ - ptr_ < 2)
452 fatal("unexpected end of file");
453 uint16 n = *ptr_++;
454 return n + (*ptr_++ << 8);
457 uint32
458 File::get_uint32()
460 if (end_ - ptr_ < 4)
461 fatal("unexpected end of file");
462 uint32 n = *ptr_++;
463 for (int i = 0; i < 3; i++)
464 n += *ptr_++ << (i + 1)*8;
465 return n;
468 uint32
469 File::get_uint32(char *orig)
471 if (end_ - ptr_ < 4)
472 fatal("unexpected end of file");
473 unsigned char v = *ptr_++;
474 uint32 n = v;
475 orig[0] = v;
476 for (int i = 1; i < 4; i++) {
477 v = *ptr_++;
478 orig[i] = v;
479 n += v << i*8;
481 return n;
484 static void
485 read_tags(File &f)
487 if (f.get_byte() != 'I' || f.get_byte() != 'I')
488 fatal("not an Intel format TFM file");
489 f.skip(6);
490 uint16 ntags = f.get_uint16();
491 entry dummy;
492 for (uint16 i = 0; i < ntags; i++) {
493 uint16 tag = f.get_uint16();
494 entry *p;
495 if (min_tag <= tag && tag <= max_tag)
496 p = tags + (tag - min_tag);
497 else
498 p = &dummy;
499 p->present = 1;
500 p->type = f.get_uint16();
501 p->count = f.get_uint32();
502 p->value = f.get_uint32(p->orig_value);
506 static int
507 check_type()
509 require_tag(type_tag);
510 int tfm_type = tag_info(type_tag).value;
511 switch (tfm_type) {
512 case MSL:
513 case UNICODE:
514 break;
515 case SYMSET:
516 fatal("cannot handle Symbol Set TFM files");
517 break;
518 default:
519 fatal("unknown type tag %1", tfm_type);
521 return tfm_type;
524 static void
525 check_units(File &f, const int tfm_type, double *ppi, double *upem)
527 require_tag(design_units_per_em_tag);
528 f.seek(tag_info(design_units_per_em_tag).value);
529 uint32 num = f.get_uint32();
530 uint32 den = f.get_uint32();
531 if (tfm_type == MSL && (num != 8782 || den != 1))
532 fatal("design units per em != 8782/1");
533 *upem = double(num) / den;
534 require_tag(inches_per_point_tag);
535 f.seek(tag_info(inches_per_point_tag).value);
536 num = f.get_uint32();
537 den = f.get_uint32();
538 if (tfm_type == MSL && (num != 100 || den != 7231))
539 fatal("inches per point not 100/7231");
540 *ppi = double(den) / num;
543 static void
544 require_tag(tag_type t)
546 if (!tag_info(t).present)
547 fatal("tag %1 missing", int(t));
550 // put a human-readable font name in the file
551 static void
552 output_font_name(File &f)
554 char *p;
556 if (!tag_info(font_name_tag).present)
557 return;
558 int count = tag_info(font_name_tag).count;
559 char *font_name = new char[count];
561 if (count > 4) { // value is a file offset to the string
562 f.seek(tag_info(font_name_tag).value);
563 int n = count;
564 p = font_name;
565 while (--n)
566 *p++ = f.get_byte();
568 else // orig_value contains the string
569 sprintf(font_name, "%.*s",
570 count, tag_info(font_name_tag).orig_value);
572 // remove any trailing space
573 p = font_name + count - 1;
574 while (csspace(*--p))
576 *(p + 1) = '\0';
577 printf("# %s\n", font_name);
578 delete font_name;
581 static void
582 output_spacewidth()
584 require_tag(word_spacing_tag);
585 printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value));
588 static void
589 read_symbol_sets(File &f)
591 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
592 uint16 *symbol_set_selectors;
593 n_symbol_sets = symbol_set_dir_length/14;
594 symbol_set_table = new symbol_set[n_symbol_sets];
595 unsigned int i;
597 for (i = 0; i < nchars; i++)
598 char_table[i].symbol_set = NO_SYMBOL_SET;
600 for (i = 0; i < n_symbol_sets; i++) {
601 f.seek(tag_info(symbol_set_tag).value + i*14);
602 (void)f.get_uint32(); // offset to symbol set name
603 uint32 off1 = f.get_uint32(); // offset to selection string
604 uint32 off2 = f.get_uint32(); // offset to symbol set index array
606 f.seek(off1);
607 uint16 kind = 0; // HP-GL "Kind 1" symbol set value
608 unsigned int j;
609 for (j = 0; j < off2 - off1; j++) {
610 unsigned char c = f.get_byte();
611 if ('0' <= c && c <= '9') // value
612 kind = kind*10 + (c - '0');
613 else if ('A' <= c && c <= 'Z') // terminator
614 kind = kind*32 + (c - 64);
616 symbol_set_table[i].select = kind;
617 for (j = 0; j < 256; j++)
618 symbol_set_table[i].index[j] = f.get_uint16();
621 symbol_set_selectors = (special_flag ? special_symbol_sets
622 : text_symbol_sets);
623 for (i = 0; symbol_set_selectors[i] != 0; i++) {
624 unsigned int j;
625 for (j = 0; j < n_symbol_sets; j++)
626 if (symbol_set_table[j].select == symbol_set_selectors[i])
627 break;
628 if (j < n_symbol_sets) {
629 for (int k = 0; k < 256; k++) {
630 uint16 idx = symbol_set_table[j].index[k];
631 if (idx != NO_GLYPH
632 && char_table[idx].symbol_set == NO_SYMBOL_SET) {
633 char_table[idx].symbol_set = symbol_set_table[j].select;
634 char_table[idx].code = k;
640 if (all_flag)
641 return;
643 symbol_set_selectors = (special_flag ? text_symbol_sets
644 : special_symbol_sets);
645 for (i = 0; symbol_set_selectors[i] != 0; i++) {
646 unsigned int j;
647 for (j = 0; j < n_symbol_sets; j++)
648 if (symbol_set_table[j].select == symbol_set_selectors[i])
649 break;
650 if (j < n_symbol_sets) {
651 for (int k = 0; k < 256; k++) {
652 uint16 idx = symbol_set_table[j].index[k];
653 if (idx != NO_GLYPH
654 && char_table[idx].symbol_set == NO_SYMBOL_SET) {
655 char_table[idx].symbol_set = symbol_set_table[j].select;
656 char_table[idx].code = k;
661 return;
664 static void
665 read_char_table(File &f)
667 require_tag(charcode_tag);
668 nchars = tag_info(charcode_tag).count;
669 char_table = new char_info[nchars];
671 f.seek(tag_info(charcode_tag).value);
672 uint32 i;
673 for (i = 0; i < nchars; i++)
674 char_table[i].charcode = f.get_uint16();
676 require_tag(width_tag);
677 f.seek(tag_info(width_tag).value);
678 for (i = 0; i < nchars; i++)
679 char_table[i].width = f.get_uint16();
681 require_tag(ascent_tag);
682 f.seek(tag_info(ascent_tag).value);
683 for (i = 0; i < nchars; i++) {
684 char_table[i].ascent = f.get_uint16();
685 if (char_table[i].ascent < 0)
686 char_table[i].ascent = 0;
689 require_tag(descent_tag);
690 f.seek(tag_info(descent_tag).value);
691 for (i = 0; i < nchars; i++) {
692 char_table[i].descent = f.get_uint16();
693 if (char_table[i].descent > 0)
694 char_table[i].descent = 0;
697 require_tag(left_extent_tag);
698 f.seek(tag_info(left_extent_tag).value);
699 for (i = 0; i < nchars; i++)
700 char_table[i].left_extent = int16(f.get_uint16());
702 require_tag(right_extent_tag);
703 f.seek(tag_info(right_extent_tag).value);
704 for (i = 0; i < nchars; i++)
705 char_table[i].right_extent = f.get_uint16();
708 static void
709 output_pclweight()
711 require_tag(stroke_weight_tag);
712 int stroke_weight = tag_info(stroke_weight_tag).value;
713 int pcl_stroke_weight;
714 if (stroke_weight < 128)
715 pcl_stroke_weight = -3;
716 else if (stroke_weight == 128)
717 pcl_stroke_weight = 0;
718 else if (stroke_weight <= 145)
719 pcl_stroke_weight = 1;
720 else if (stroke_weight <= 179)
721 pcl_stroke_weight = 3;
722 else
723 pcl_stroke_weight = 4;
724 printf("pclweight %d\n", pcl_stroke_weight);
727 static void
728 output_pclproportional()
730 require_tag(spacing_tag);
731 printf("pclproportional %d\n", tag_info(spacing_tag).value == 0);
734 static void
735 read_and_output_pcltypeface(File &f)
737 printf("pcltypeface ");
738 require_tag(typeface_tag);
739 if (tag_info(typeface_tag).count > 4) {
740 f.seek(tag_info(typeface_tag).value);
741 for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) {
742 unsigned char c = f.get_byte();
743 if (c == '\0')
744 break;
745 putchar(c);
748 else
749 printf("%.4s", tag_info(typeface_tag).orig_value);
750 printf("\n");
753 static void
754 output_pclstyle()
756 unsigned pcl_style = 0;
757 // older tfms don't have the posture tag
758 if (tag_info(posture_tag).present) {
759 if (tag_info(posture_tag).value)
760 pcl_style |= 1;
762 else {
763 require_tag(slant_tag);
764 if (tag_info(slant_tag).value != 0)
765 pcl_style |= 1;
767 require_tag(appearance_width_tag);
768 if (tag_info(appearance_width_tag).value < 100) // guess
769 pcl_style |= 4;
770 printf("pclstyle %d\n", pcl_style);
773 static void
774 output_slant()
776 require_tag(slant_tag);
777 int slant = int16(tag_info(slant_tag).value);
778 if (slant != 0)
779 printf("slant %f\n", slant/100.0);
782 static void
783 output_ligatures()
785 // don't use ligatures for fixed space font
786 require_tag(spacing_tag);
787 if (tag_info(spacing_tag).value != 0)
788 return;
789 static const char *ligature_names[] = {
790 "fi", "fl", "ff", "ffi", "ffl"
793 static const char *ligature_chars[] = {
794 "fi", "fl", "ff", "Fi", "Fl"
797 unsigned ligature_mask = 0;
798 unsigned int i;
799 for (i = 0; i < nchars; i++) {
800 uint16 charcode = char_table[i].charcode;
801 if (charcode < charcode_name_table_size
802 && char_table[i].symbol_set != NO_SYMBOL_SET) {
803 for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
804 for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
805 if (strcmp(p->name, ligature_chars[j]) == 0) {
806 ligature_mask |= 1 << j;
807 break;
811 if (ligature_mask) {
812 printf("ligatures");
813 for (i = 0; i < SIZEOF(ligature_names); i++)
814 if (ligature_mask & (1 << i))
815 printf(" %s", ligature_names[i]);
816 printf(" 0\n");
820 static void
821 read_and_output_kernpairs(File &f)
823 if (tag_info(pair_kern_tag).present) {
824 printf("kernpairs\n");
825 f.seek(tag_info(pair_kern_tag).value);
826 uint16 n_pairs = f.get_uint16();
827 for (int i = 0; i < n_pairs; i++) {
828 uint16 i1 = f.get_uint16();
829 uint16 i2 = f.get_uint16();
830 int16 val = int16(f.get_uint16());
831 if (char_table[i1].symbol_set != NO_SYMBOL_SET
832 && char_table[i2].symbol_set != NO_SYMBOL_SET
833 && char_table[i1].charcode < charcode_name_table_size
834 && char_table[i2].charcode < charcode_name_table_size) {
835 for (name_list *p = charcode_name_table[char_table[i1].charcode];
837 p = p->next)
838 for (name_list *q = charcode_name_table[char_table[i2].charcode];
840 q = q->next)
841 if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
842 printf("%s %s %d\n", p->name, q->name, scale(val));
848 static void
849 output_charset(const int tfm_type)
851 require_tag(slant_tag);
852 double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
853 double slant = sin(slant_angle)/cos(slant_angle);
855 if (italic_flag)
856 require_tag(x_height_tag);
857 require_tag(lower_ascent_tag);
858 require_tag(lower_descent_tag);
860 printf("charset\n");
861 unsigned int i;
862 for (i = 0; i < nchars; i++) {
863 uint16 charcode = char_table[i].charcode;
865 // the glyph is bound to one of the searched symbol sets
866 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
867 // the character was in the map file
868 if (charcode < charcode_name_table_size && charcode_name_table[charcode])
869 printf("%s", charcode_name_table[charcode]->name);
870 else if (!all_flag)
871 continue;
872 else if (tfm_type == MSL)
873 printf(hp_msl_to_ucode_name(charcode));
874 else
875 printf(unicode_to_ucode_name(charcode));
877 printf("\t%d,%d",
878 scale(char_table[i].width), scale(char_table[i].ascent));
880 int depth = scale(-char_table[i].descent);
881 if (depth < 0)
882 depth = 0;
883 int italic_correction = 0;
884 int left_italic_correction = 0;
885 int subscript_correction = 0;
887 if (italic_flag) {
888 italic_correction = scale(char_table[i].right_extent
889 - char_table[i].width
890 + italic_sep);
891 if (italic_correction < 0)
892 italic_correction = 0;
893 subscript_correction = int((tag_info(x_height_tag).value
894 * slant * .8) + .5);
895 if (subscript_correction > italic_correction)
896 subscript_correction = italic_correction;
897 left_italic_correction = scale(italic_sep
898 - char_table[i].left_extent);
901 if (subscript_correction != 0)
902 printf(",%d,%d,%d,%d",
903 depth, italic_correction, left_italic_correction,
904 subscript_correction);
905 else if (left_italic_correction != 0)
906 printf(",%d,%d,%d", depth, italic_correction, left_italic_correction);
907 else if (italic_correction != 0)
908 printf(",%d,%d", depth, italic_correction);
909 else if (depth != 0)
910 printf(",%d", depth);
911 // This is fairly arbitrary. Fortunately it doesn't much matter.
912 unsigned type = 0;
913 if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
914 type |= 2;
915 if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
916 type |= 1;
917 printf("\t%d\t%d", type,
918 char_table[i].symbol_set*256 + char_table[i].code);
920 if (tfm_type == UNICODE) {
921 if (charcode >= 0xE000 && charcode <= 0xF8FF)
922 printf("\t-- HP PUA U+%04X", charcode);
923 else
924 printf("\t-- U+%04X", charcode);
926 else
927 printf("\t-- MSL %4d", charcode);
928 printf(" (%3s %3d)\n",
929 show_symset(char_table[i].symbol_set), char_table[i].code);
931 if (charcode < charcode_name_table_size
932 && charcode_name_table[charcode])
933 for (name_list *p = charcode_name_table[charcode]->next;
934 p; p = p->next)
935 printf("%s\t\"\n", p->name);
937 // warnings about characters in mapfile not found in TFM
938 else if (charcode < charcode_name_table_size
939 && charcode_name_table[charcode]) {
940 char *name = charcode_name_table[charcode]->name;
941 // don't warn about Unicode or unnamed glyphs
942 // that aren't in the the TFM file
943 if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
944 && !is_uname(name)) {
945 fprintf(stderr, "%s: warning: symbol U+%04X (%s",
946 program_name, charcode, name);
947 for (name_list *p = charcode_name_table[charcode]->next;
948 p; p = p->next)
949 fprintf(stderr, ", %s", p->name);
950 fprintf(stderr, ") not in any searched symbol set\n");
952 else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) {
953 fprintf(stderr, "%s: warning: symbol MSL %d (%s",
954 program_name, charcode, name);
955 for (name_list *p = charcode_name_table[charcode]->next;
956 p; p = p->next)
957 fprintf(stderr, ", %s", p->name);
958 fprintf(stderr, ") not in any searched symbol set\n");
964 #define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
966 static void
967 dump_tags(File &f)
969 double upem = -1.0;
971 printf("TFM tags\n"
972 "\n"
973 "tag# type count value\n"
974 "---------------------\n");
976 for (int i = min_tag; i <= max_tag; i++) {
977 enum tag_type t = tag_type(i);
978 if (tag_info(t).present) {
979 printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
980 switch (tag_info(t).type) {
981 case BYTE_TYPE:
982 case USHORT_TYPE:
983 printf(" %5u", tag_info(t).value);
984 switch (i) {
985 case type_tag:
986 printf(" Font Type ");
987 switch (tag_info(t).value) {
988 case MSL:
989 case SYMSET:
990 printf("(Intellifont)");
991 break;
992 case UNICODE:
993 printf("(TrueType)");
995 break;
996 case charcode_tag:
997 printf(" Number of Symbols (%u)", tag_info(t).count);
998 break;
999 case symbol_set_tag:
1000 printf(" Symbol Sets (%u): ",
1001 tag_info(symbol_set_tag).count / 14);
1002 dump_symbol_sets(f);
1003 break;
1004 case type_structure_tag:
1005 printf(" Type Structure (%u)", tag_info(t).value);
1006 break;
1007 case stroke_weight_tag:
1008 printf(" Stroke Weight (%u)", tag_info(t).value);
1009 break;
1010 case spacing_tag:
1011 printf(" Spacing ");
1012 switch (tag_info(t).value) {
1013 case 0:
1014 printf("(Proportional)");
1015 break;
1016 case 1:
1017 printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
1018 em_fract(tag_info(t).value));
1019 break;
1021 break;
1022 case appearance_width_tag:
1023 printf(" Appearance Width (%u)", tag_info(t).value);
1024 break;
1025 case serif_style_tag:
1026 printf(" Serif Style (%u)", tag_info(t).value);
1027 break;
1028 case posture_tag:
1029 printf(" Posture (%s)", tag_info(t).value == 0
1030 ? "Upright"
1031 : tag_info(t).value == 1
1032 ? "Italic"
1033 : "Alternate Italic");
1034 break;
1035 case max_width_tag:
1036 printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
1037 em_fract(tag_info(t).value));
1038 break;
1039 case word_spacing_tag:
1040 printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
1041 em_fract(tag_info(t).value));
1042 break;
1043 case recommended_line_spacing_tag:
1044 printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
1045 em_fract(tag_info(t).value));
1046 break;
1047 case x_height_tag:
1048 printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
1049 em_fract(tag_info(t).value));
1050 break;
1051 case cap_height_tag:
1052 printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
1053 em_fract(tag_info(t).value));
1054 break;
1055 case max_ascent_tag:
1056 printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
1057 em_fract(tag_info(t).value));
1058 break;
1059 case lower_ascent_tag:
1060 printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
1061 em_fract(tag_info(t).value));
1062 break;
1063 case underscore_thickness_tag:
1064 printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
1065 em_fract(tag_info(t).value));
1066 break;
1067 case uppercase_accent_height_tag:
1068 printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1069 em_fract(tag_info(t).value));
1070 break;
1071 case lowercase_accent_height_tag:
1072 printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1073 em_fract(tag_info(t).value));
1074 break;
1075 case width_tag:
1076 printf(" Horizontal Escapement array");
1077 break;
1078 case vertical_escapement_tag:
1079 printf(" Vertical Escapement array");
1080 break;
1081 case right_extent_tag:
1082 printf(" Right Extent array");
1083 break;
1084 case ascent_tag:
1085 printf(" Character Ascent array");
1086 break;
1087 case pair_kern_tag:
1088 f.seek(tag_info(t).value);
1089 printf(" Kern Pairs (%u)", f.get_uint16());
1090 break;
1091 case panose_tag:
1092 printf(" PANOSE Classification array");
1093 break;
1095 break;
1096 case SIGNED_SHORT_TYPE:
1097 printf(" %5d", int16(tag_info(t).value));
1098 switch (i) {
1099 case slant_tag:
1100 printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
1101 break;
1102 case max_descent_tag:
1103 printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1104 em_fract(int16(tag_info(t).value)));
1105 break;
1106 case lower_descent_tag:
1107 printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1108 em_fract(int16(tag_info(t).value)));
1109 break;
1110 case underscore_depth_tag:
1111 printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
1112 em_fract(int16(tag_info(t).value)));
1113 break;
1114 case left_extent_tag:
1115 printf(" Left Extent array");
1116 break;
1117 // The type of this tag has changed from SHORT to SIGNED SHORT
1118 // in TFM version 1.3.0.
1119 case ascent_tag:
1120 printf(" Character Ascent array");
1121 break;
1122 case descent_tag:
1123 printf(" Character Descent array");
1124 break;
1126 break;
1127 case RATIONAL_TYPE:
1128 printf(" %5u", tag_info(t).value);
1129 switch (i) {
1130 case inches_per_point_tag:
1131 printf(" Inches per Point");
1132 break;
1133 case nominal_point_size_tag:
1134 printf(" Nominal Point Size");
1135 break;
1136 case design_units_per_em_tag:
1137 printf(" Design Units per Em");
1138 break;
1139 case average_width_tag:
1140 printf(" Average Width");
1141 break;
1143 if (tag_info(t).count == 1) {
1144 f.seek(tag_info(t).value);
1145 uint32 num = f.get_uint32();
1146 uint32 den = f.get_uint32();
1147 if (i == design_units_per_em_tag)
1148 upem = double(num) / den;
1149 printf(" (%u/%u = %g)", num, den, double(num)/den);
1151 break;
1152 case ASCII_TYPE:
1153 printf(" %5u ", tag_info(t).value);
1154 switch (i) {
1155 case comment_tag:
1156 printf("Comment ");
1157 break;
1158 case copyright_tag:
1159 printf("Copyright ");
1160 break;
1161 case unique_identifier_tag:
1162 printf("Unique ID ");
1163 break;
1164 case font_name_tag:
1165 printf("Typeface Name ");
1166 break;
1167 case typeface_source_tag:
1168 printf("Typeface Source ");
1169 break;
1170 case typeface_tag:
1171 printf("PCL Typeface ");
1172 break;
1174 dump_ascii(f, t);
1176 putchar('\n');
1179 putchar('\n');
1181 #undef em_fract
1183 static void
1184 dump_ascii(File &f, tag_type t)
1186 putchar('"');
1187 if (tag_info(t).count > 4) {
1188 int count = tag_info(t).count;
1189 f.seek(tag_info(t).value);
1190 while (--count)
1191 printf("%c", f.get_byte());
1193 else
1194 printf("%.4s", tag_info(t).orig_value);
1195 putchar('"');
1198 static void
1199 dump_symbol_sets(File &f)
1201 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
1202 uint32 num_symbol_sets = symbol_set_dir_length / 14;
1204 for (uint32 i = 0; i < num_symbol_sets; i++) {
1205 f.seek(tag_info(symbol_set_tag).value + i * 14);
1206 (void)f.get_uint32(); // offset to symbol set name
1207 uint32 off1 = f.get_uint32(); // offset to selection string
1208 uint32 off2 = f.get_uint32(); // offset to symbol set index array
1209 f.seek(off1);
1210 for (uint32 j = 0; j < off2 - off1; j++) {
1211 unsigned char c = f.get_byte();
1212 if ('0' <= c && c <= '9')
1213 putchar(c);
1214 else if ('A' <= c && c <= 'Z')
1215 printf(i < num_symbol_sets - 1 ? "%c," : "%c", c);
1220 static void
1221 dump_symbols(int tfm_type)
1223 printf("Symbols:\n"
1224 "\n"
1225 " glyph id# symbol set name(s)\n"
1226 "----------------------------------\n");
1227 for (uint32 i = 0; i < nchars; i++) {
1228 uint16 charcode = char_table[i].charcode;
1229 if (charcode < charcode_name_table_size
1230 && charcode_name_table[charcode]) {
1231 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
1232 printf(tfm_type == UNICODE ? "%4d (U+%04X) (%3s %3d) %s"
1233 : "%4d (MSL %4d) (%3s %3d) %s",
1234 i, charcode,
1235 show_symset(char_table[i].symbol_set),
1236 char_table[i].code,
1237 charcode_name_table[charcode]->name);
1238 for (name_list *p = charcode_name_table[charcode]->next;
1239 p; p = p->next)
1240 printf(", %s", p->name);
1241 putchar('\n');
1244 else {
1245 printf(tfm_type == UNICODE ? "%4d (U+%04X) "
1246 : "%4d (MSL %4d) ",
1247 i, charcode);
1248 if (char_table[i].symbol_set != NO_SYMBOL_SET)
1249 printf("(%3s %3d)",
1250 show_symset(char_table[i].symbol_set), char_table[i].code);
1251 putchar('\n');
1254 putchar('\n');
1257 static char *
1258 show_symset(unsigned int symset)
1260 static char symset_str[8];
1262 sprintf(symset_str, "%d%c", symset / 32, (symset & 31) + 64);
1263 return symset_str;
1266 static char *
1267 hp_msl_to_ucode_name(int msl)
1269 char codestr[8];
1271 sprintf(codestr, "%d", msl);
1272 const char *ustr = hp_msl_to_unicode_code(codestr);
1273 if (ustr == NULL)
1274 ustr = UNNAMED;
1275 else {
1276 char *nonum;
1277 int ucode = int(strtol(ustr, &nonum, 16));
1278 // don't allow PUA code points as Unicode names
1279 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1280 ustr = UNNAMED;
1282 if (!equal(ustr, UNNAMED)) {
1283 const char *uname_decomposed = decompose_unicode(ustr);
1284 if (uname_decomposed)
1285 // 1st char is the number of components
1286 ustr = uname_decomposed + 1;
1288 char *value = new char[strlen(ustr) + 1];
1289 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1290 return value;
1293 static char *
1294 unicode_to_ucode_name(int ucode)
1296 const char *ustr;
1297 char codestr[8];
1299 // don't allow PUA code points as Unicode names
1300 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1301 ustr = UNNAMED;
1302 else {
1303 sprintf(codestr, "%04X", ucode);
1304 ustr = codestr;
1306 if (!equal(ustr, UNNAMED)) {
1307 const char *uname_decomposed = decompose_unicode(ustr);
1308 if (uname_decomposed)
1309 // 1st char is the number of components
1310 ustr = uname_decomposed + 1;
1312 char *value = new char[strlen(ustr) + 1];
1313 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1314 return value;
1317 static int
1318 is_uname(char *name)
1320 size_t i;
1321 size_t len = strlen(name);
1322 if (len % 5)
1323 return 0;
1325 if (name[0] != 'u')
1326 return 0;
1327 for (i = 1; i < 4; i++)
1328 if (!csxdigit(name[i]))
1329 return 0;
1330 for (i = 5; i < len; i++)
1331 if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
1332 return 0;
1334 return 1;
1337 static int
1338 read_map(const char *file, const int tfm_type)
1340 errno = 0;
1341 FILE *fp = fopen(file, "r");
1342 if (!fp) {
1343 error("can't open `%1': %2", file, strerror(errno));
1344 return 0;
1346 current_filename = file;
1347 char buf[512];
1348 current_lineno = 0;
1349 char *nonum;
1350 while (fgets(buf, int(sizeof(buf)), fp)) {
1351 current_lineno++;
1352 char *ptr = buf;
1353 while (csspace(*ptr))
1354 ptr++;
1355 if (*ptr == '\0' || *ptr == '#')
1356 continue;
1357 ptr = strtok(ptr, " \n\t");
1358 if (!ptr)
1359 continue;
1361 int msl_code = int(strtol(ptr, &nonum, 10));
1362 if (*nonum != '\0') {
1363 if (csxdigit(*nonum))
1364 error("bad MSL map: got hex code (%1)", ptr);
1365 else if (ptr == nonum)
1366 error("bad MSL map: bad MSL code (%1)", ptr);
1367 else
1368 error("bad MSL map");
1369 fclose(fp);
1370 return 0;
1373 ptr = strtok(NULL, " \n\t");
1374 if (!ptr)
1375 continue;
1376 int unicode = int(strtol(ptr, &nonum, 16));
1377 if (*nonum != '\0') {
1378 if (ptr == nonum)
1379 error("bad Unicode value (%1)", ptr);
1380 else
1381 error("bad Unicode map");
1382 fclose(fp);
1383 return 0;
1385 if (strlen(ptr) != 4) {
1386 error("bad Unicode value (%1)", ptr);
1387 return 0;
1390 int n = tfm_type == MSL ? msl_code : unicode;
1391 if (tfm_type == UNICODE && n > 0xFFFF) {
1392 // greatest value supported by TFM files
1393 error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
1394 fclose(fp);
1395 return 0;
1397 else if (n < 0) {
1398 error("negative code value (%1)", ptr);
1399 fclose(fp);
1400 return 0;
1403 ptr = strtok(NULL, " \n\t");
1404 if (!ptr) { // groff name
1405 error("missing name(s)");
1406 fclose(fp);
1407 return 0;
1409 // leave decomposed Unicode values alone
1410 else if (is_uname(ptr) && !is_decomposed(ptr))
1411 ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
1413 if (size_t(n) >= charcode_name_table_size) {
1414 size_t old_size = charcode_name_table_size;
1415 name_list **old_table = charcode_name_table;
1416 charcode_name_table_size = n + 256;
1417 charcode_name_table = new name_list *[charcode_name_table_size];
1418 if (old_table) {
1419 memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
1420 a_delete old_table;
1422 for (size_t i = old_size; i < charcode_name_table_size; i++)
1423 charcode_name_table[i] = NULL;
1426 // a '#' that isn't the first groff name begins a comment
1427 for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) {
1428 if (names++ > 1 && *ptr == '#')
1429 break;
1430 charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
1433 fclose(fp);
1434 return 1;
1437 static const char *
1438 xbasename(const char *s)
1440 // DIR_SEPS[] are possible directory separator characters, see
1441 // nonposix.h. We want the rightmost separator of all possible
1442 // ones. Example: d:/foo\\bar.
1443 const char *b = strrchr(s, DIR_SEPS[0]), *b1;
1444 const char *sep = &DIR_SEPS[1];
1446 while (*sep)
1448 b1 = strrchr(s, *sep);
1449 if (b1 && (!b || b1 > b))
1450 b = b1;
1451 sep++;
1453 return b ? b + 1 : s;