* font/devlj4/generate/text.map: Add more MSL numbers.
[s-roff.git] / src / utils / hpftodit / hpftodit.cpp
bloba8dccde041e4e8502c0cae02356aa7c7dab8cfad
1 // -*- C++ -*-
2 /* Copyright (C) 1994, 2000, 2001, 2003 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 TODO
23 devise new names for useful characters
24 option to specify symbol sets to look in
25 put filename in error messages (or fix lib)
28 #include "lib.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <math.h>
35 #include <errno.h>
36 #include "assert.h"
37 #include "posix.h"
38 #include "errarg.h"
39 #include "error.h"
40 #include "cset.h"
41 #include "nonposix.h"
42 #include "unicode.h"
44 extern "C" const char *Version_string;
45 extern const char *hp_msl_to_unicode_code(const char *);
47 #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
48 #define equal(a, b) (strcmp(a, b) == 0)
49 // only valid if is_uname(c) has returned true
50 #define is_decomposed(c) strchr(c, '_')
52 #define NO 0
53 #define YES 1
55 #define MSL 0
56 #define SYMSET 1
57 #define UNICODE 2
59 #define UNNAMED "---"
61 static double multiplier = 3.0; // make Agfa-based unitwidth an integer
63 inline
64 int scale(int n)
66 return int(n * multiplier + 0.5);
69 // tags in TFM file
71 enum tag_type {
72 min_tag = 400,
73 type_tag = 400,
74 copyright_tag = 401,
75 comment_tag = 402,
76 charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType
77 symbol_set_tag = 404,
78 unique_identifier_tag = 405,
79 inches_per_point_tag = 406,
80 nominal_point_size_tag = 407,
81 design_units_per_em_tag = 408,
82 posture_tag = 409,
83 type_structure_tag = 410,
84 stroke_weight_tag = 411,
85 spacing_tag = 412,
86 slant_tag = 413,
87 appearance_width_tag = 414,
88 serif_style_tag = 415,
89 font_name_tag = 417,
90 typeface_source_tag = 418,
91 average_width_tag = 419,
92 max_width_tag = 420,
93 word_spacing_tag = 421,
94 recommended_line_spacing_tag = 422,
95 cap_height_tag = 423,
96 x_height_tag = 424,
97 max_ascent_tag = 425,
98 max_descent_tag = 426,
99 lower_ascent_tag = 427,
100 lower_descent_tag = 428,
101 underscore_depth_tag = 429,
102 underscore_thickness_tag = 430,
103 uppercase_accent_height_tag = 431,
104 lowercase_accent_height_tag = 432,
105 width_tag = 433,
106 vertical_escapement_tag = 434,
107 left_extent_tag = 435,
108 right_extent_tag = 436,
109 ascent_tag = 437,
110 descent_tag = 438,
111 pair_kern_tag = 439,
112 sector_kern_tag = 440,
113 track_kern_tag = 441,
114 typeface_tag = 442,
115 panose_tag = 443,
116 max_tag = 443
119 const char *tag_name[] = {
120 "Symbol Set",
121 "Font Type" // MSL for Intellifont, Unicode for TrueType
124 // types in TFM file
125 enum {
126 BYTE_TYPE = 1,
127 ASCII_TYPE = 2, // NUL-terminated string
128 USHORT_TYPE = 3,
129 LONG_TYPE = 4, // unused
130 RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator
131 SIGNED_BYTE_TYPE = 16, // unused
132 SIGNED_SHORT_TYPE = 17,
133 SIGNED_LONG_TYPE = 18 // unused
136 typedef unsigned char byte;
137 typedef unsigned short uint16;
138 typedef short int16;
139 typedef unsigned int uint32;
141 class File {
142 public:
143 File(const char *);
144 void skip(int n);
145 byte get_byte();
146 uint16 get_uint16();
147 uint32 get_uint32();
148 void seek(uint32 n);
149 private:
150 unsigned char *buf_;
151 const unsigned char *ptr_;
152 const unsigned char *end_;
155 struct entry {
156 char present;
157 uint16 type;
158 uint32 count;
159 uint32 value;
160 entry() : present(0) { }
163 struct char_info {
164 uint16 charcode;
165 uint16 width;
166 int16 ascent;
167 int16 descent;
168 int16 left_extent;
169 uint16 right_extent;
170 uint16 symbol_set;
171 unsigned char code;
174 const uint16 NO_SYMBOL_SET = 0;
176 struct name_list {
177 char *name;
178 name_list *next;
179 name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { }
180 ~name_list() { a_delete name; }
183 struct symbol_set {
184 uint16 select;
185 uint16 index[256];
188 #define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
190 // change this to '1' to compare results with original version
191 #if 0
192 uint16 text_symbol_sets[] = {
193 SYMBOL_SET(0, 'N'), // Latin 1
194 SYMBOL_SET(6, 'J'), // Microsoft Publishing
195 SYMBOL_SET(2, 'N'), // Latin 2
199 uint16 special_symbol_sets[] = {
200 SYMBOL_SET(8, 'M'),
201 SYMBOL_SET(5, 'M'),
202 SYMBOL_SET(15, 'U'),
205 #else
206 uint16 text_symbol_sets[] = {
207 SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252)
208 SYMBOL_SET(7, 'J'), // Desktop
209 SYMBOL_SET(6, 'J'), // Microsoft Publishing
210 SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250
211 SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9M,
212 // so we should never get here)
213 SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U,
214 // so we should never get here)
215 SYMBOL_SET(8, 'U'), // HP Roman 8
216 SYMBOL_SET(10, 'J'), // PS Standard
217 SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI"
219 //SYMBOL_SET(13, 'J'), // Ventura International (deprecated)
220 //SYMBOL_SET(6, 'M'), // Ventura Math (deprecated)
221 //SYMBOL_SET(14, 'J'), // Ventura US (deprecated)
222 SYMBOL_SET(5, 'T'), // Code Page 1254
223 SYMBOL_SET(0, 'D'), // ISO 60, 7-bit Norwegian version 1
224 SYMBOL_SET(5, 'N'), // ISO 8859-9, Latin 5
225 SYMBOL_SET(1, 'F'), // ISO 69, 7-bit French
226 SYMBOL_SET(1, 'G'), // ISO 21, 7-bit German
227 SYMBOL_SET(0, 'I'), // ISO 15, 7-bit Italian
228 SYMBOL_SET(1, 'U'), // Legal
229 SYMBOL_SET(12, 'J'), // MC Text
230 SYMBOL_SET(10, 'U'), // PC Code Page 437
231 SYMBOL_SET(11, 'U'), // PC Code Page 437N
232 SYMBOL_SET(17, 'U'), // PC Code Page 852
233 SYMBOL_SET(12, 'U'), // PC Code Page 850
234 SYMBOL_SET(9, 'T'), // PC Code Page 437T
235 SYMBOL_SET(2, 'S'), // ISO 17, 7-bit Spanish
236 SYMBOL_SET(0, 'S'), // ISO 11, 7-bit Swedish
237 SYMBOL_SET(1, 'E'), // ISO 4, 7-bit UK English
238 SYMBOL_SET(0, 'U'), // ISO 6, 7-bit ASCII English
242 uint16 special_symbol_sets[] = {
243 SYMBOL_SET(8, 'M'), // Math 8
244 SYMBOL_SET(5, 'M'), // PS Math
245 SYMBOL_SET(15, 'U'), // Pi font
246 SYMBOL_SET(19, 'M'), // Symbol font
249 #endif
251 entry tags[max_tag + 1 - min_tag];
253 char_info *char_table;
254 uint32 nchars = 0;
256 unsigned int charcode_name_table_size = 0;
257 name_list **charcode_name_table = NULL;
259 unsigned int n_symbol_sets;
260 symbol_set *symbol_set_table;
262 static int debug_flag = NO;
263 static int special_flag = NO; // not a special font
264 static int italic_flag = NO; // don't add italic correction
265 static int italic_sep;
266 static int all_flag = NO; // don't include glyphs not in mapfile
267 static int quiet_flag = NO; // don't suppress warnings about symbols not found
269 static char *hp_msl_to_ucode_name(int);
270 static char *unicode_to_ucode_name(int);
271 static int is_uname(char *);
272 static int get_printcode(uint32, uint16 *, uint16 *);
273 static char *show_symset(unsigned int);
274 static void usage(FILE *);
275 static void usage();
276 static const char *xbasename(const char *);
277 static void read_tags(File &);
278 static int check_type();
279 static void check_units(File &, const int, double *, double *);
280 static int read_map(const char *, const int);
281 static void require_tag(tag_type);
282 static void dump_ascii(File &, tag_type);
283 static void dump_tags(File &);
284 static void dump_symbol_sets(File &);
285 static void dump_symbols(int);
286 static void output_font_name(File &);
287 static void output_spacewidth();
288 static void output_pclweight();
289 static void output_pclproportional();
290 static void read_and_output_pcltypeface(File &);
291 static void output_pclstyle();
292 static void output_slant();
293 static void output_ligatures();
294 static void read_symbol_sets(File &);
295 static void read_and_output_kernpairs(File &);
296 static void output_charset(const int);
297 static void read_char_table(File &);
299 inline
300 entry &tag_info(tag_type t)
302 return tags[t - min_tag];
305 int main(int argc, char **argv)
307 program_name = argv[0];
309 int opt;
310 int res = 1200; // PCL unit of measure for cursor moves
311 int scalesize = 4; // LaserJet 4 only allows 1/4 point increments
312 int unitwidth = 6350;
313 double ppi; // points per inch
314 double upem; // design units per em
316 static const struct option long_options[] = {
317 { "help", no_argument, 0, CHAR_MAX + 1 },
318 { "version", no_argument, 0, 'v' },
319 { NULL, 0, 0, 0 }
321 while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
322 switch (opt) {
323 case 'a':
324 all_flag = YES;
325 break;
326 case 'd':
327 debug_flag = YES;
328 break;
329 case 's':
330 special_flag = YES;
331 break;
332 case 'i':
333 italic_flag = YES;
334 italic_sep = atoi(optarg); // design units
335 break;
336 case 'q':
337 quiet_flag = YES; // suppress warnings about symbols not found
338 break;
339 case 'v':
340 printf("GNU hpftodit (groff) version %s\n", Version_string);
341 exit(0);
342 break;
343 case CHAR_MAX + 1: // --help
344 usage(stdout);
345 exit(0);
346 break;
347 case '?':
348 usage();
349 break;
350 default:
351 assert(0);
355 if (debug_flag && argc - optind < 1)
356 usage();
357 else if (!debug_flag && argc - optind != 3)
358 usage();
359 File f(argv[optind]);
360 read_tags(f);
361 int tfm_type = check_type();
362 if (debug_flag)
363 dump_tags(f);
364 if (!debug_flag && !read_map(argv[optind + 1], tfm_type))
365 exit(1);
366 else if (debug_flag && argc - optind > 1)
367 read_map(argv[optind + 1], tfm_type);
368 current_filename = NULL;
369 current_lineno = -1; // no line numbers
370 if (!debug_flag && !equal(argv[optind + 2], "-"))
371 if (freopen(argv[optind + 2], "w", stdout) == NULL)
372 fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
373 current_filename = argv[optind];
375 check_units(f, tfm_type, &ppi, &upem);
376 if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs
377 multiplier = double(res) / upem / ppi * unitwidth / scalesize;
378 if (italic_flag)
379 // convert from thousandths of an em to design units
380 italic_sep = int(italic_sep * upem / 1000 + 0.5);
382 read_char_table(f);
383 if (nchars == 0)
384 fatal("no characters");
386 if (!debug_flag) {
387 output_font_name(f);
388 printf("name %s\n", xbasename(argv[optind + 2]));
389 if (special_flag)
390 printf("special\n");
391 output_spacewidth();
392 output_slant();
393 read_and_output_pcltypeface(f);
394 output_pclproportional();
395 output_pclweight();
396 output_pclstyle();
398 read_symbol_sets(f);
399 if (debug_flag)
400 dump_symbols(tfm_type);
401 else {
402 output_ligatures();
403 read_and_output_kernpairs(f);
404 output_charset(tfm_type);
406 return 0;
409 static
410 void usage(FILE *stream)
412 fprintf(stream,
413 "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n"
414 " %s -d tfm_file [map_file]\n",
415 program_name, program_name);
418 static
419 void usage()
421 usage(stderr);
422 exit(1);
425 File::File(const char *s)
427 // We need to read the file in binary mode because hpftodit relies
428 // on byte counts.
429 int fd = open(s, O_RDONLY | O_BINARY);
430 if (fd < 0)
431 fatal("cannot open `%1': %2", s, strerror(errno));
432 current_filename = s;
433 struct stat sb;
434 if (fstat(fd, &sb) < 0)
435 fatal("cannot stat: %1", strerror(errno));
436 if (!S_ISREG(sb.st_mode))
437 fatal("not a regular file");
438 buf_ = new unsigned char[sb.st_size];
439 long nread = read(fd, buf_, sb.st_size);
440 if (nread < 0)
441 fatal("read error: %1", strerror(errno));
442 if (nread != sb.st_size)
443 fatal("read unexpected number of bytes");
444 ptr_ = buf_;
445 end_ = buf_ + sb.st_size;
448 void File::skip(int n)
450 if (end_ - ptr_ < n)
451 fatal("unexpected end of file");
452 ptr_ += n;
455 void File::seek(uint32 n)
457 if (uint32(end_ - buf_) < n)
458 fatal("unexpected end of file");
459 ptr_ = buf_ + n;
462 byte File::get_byte()
464 if (ptr_ >= end_)
465 fatal("unexpected end of file");
466 return *ptr_++;
469 uint16 File::get_uint16()
471 if (end_ - ptr_ < 2)
472 fatal("unexpected end of file");
473 uint16 n = *ptr_++;
474 return n + (*ptr_++ << 8);
477 uint32 File::get_uint32()
479 if (end_ - ptr_ < 4)
480 fatal("unexpected end of file");
481 uint32 n = *ptr_++;
482 for (int i = 0; i < 3; i++)
483 n += *ptr_++ << (i + 1)*8;
484 return n;
487 static
488 void read_tags(File &f)
490 if (f.get_byte() != 'I' || f.get_byte() != 'I')
491 fatal("not an Intel format TFM file");
492 f.skip(6);
493 uint16 ntags = f.get_uint16();
494 entry dummy;
495 for (uint16 i = 0; i < ntags; i++) {
496 uint16 tag = f.get_uint16();
497 entry *p;
498 if (min_tag <= tag && tag <= max_tag)
499 p = tags + (tag - min_tag);
500 else
501 p = &dummy;
502 p->present = 1;
503 p->type = f.get_uint16();
504 p->count = f.get_uint32();
505 p->value = f.get_uint32();
509 static
510 int check_type()
512 require_tag(type_tag);
513 int tfm_type = tag_info(type_tag).value;
514 switch (tfm_type) {
515 case MSL:
516 case UNICODE:
517 break;
518 case SYMSET:
519 fatal("cannot handle Symbol Set TFM files");
520 break;
521 default:
522 fatal("unknown type tag %1", tfm_type);
524 return tfm_type;
527 static
528 void check_units(File &f, const int tfm_type, double *ppi, double *upem)
530 require_tag(design_units_per_em_tag);
531 f.seek(tag_info(design_units_per_em_tag).value);
532 uint32 num = f.get_uint32();
533 uint32 den = f.get_uint32();
534 if (tfm_type == MSL && (num != 8782 || den != 1))
535 fatal("design units per em != 8782/1");
536 *upem = double(num) / den;
537 require_tag(inches_per_point_tag);
538 f.seek(tag_info(inches_per_point_tag).value);
539 num = f.get_uint32();
540 den = f.get_uint32();
541 if (tfm_type == MSL && (num != 100 || den != 7231))
542 fatal("inches per point not 100/7231");
543 *ppi = double(den) / num;
546 static
547 void require_tag(tag_type t)
549 if (!tag_info(t).present)
550 fatal("tag %1 missing", int(t));
553 // put a human-readable font name in the file
554 static
555 void output_font_name(File &f)
557 char *p;
559 if (!tag_info(font_name_tag).present)
560 return;
561 int count = tag_info(font_name_tag).count;
562 char *font_name = new char[count];
564 if (count > 4) { // value is a file offset to the string
565 f.seek(tag_info(font_name_tag).value);
566 int n = count;
567 p = font_name;
568 while (--n)
569 *p++ = f.get_byte();
571 else // value contains the string
572 sprintf(font_name, "%.*s", count, (char*)(tag_info(font_name_tag).value));
574 // remove any trailing space
575 p = font_name + count - 1;
576 while (csspace(*--p))
578 *(p + 1) = '\0';
579 printf("# %s\n", font_name);
580 delete font_name;
583 static
584 void output_spacewidth()
586 require_tag(word_spacing_tag);
587 printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value));
590 static
591 void read_symbol_sets(File &f)
593 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
594 n_symbol_sets = symbol_set_dir_length/14;
595 symbol_set_table = new symbol_set[n_symbol_sets];
596 unsigned int i;
597 for (i = 0; i < n_symbol_sets; i++) {
598 f.seek(tag_info(symbol_set_tag).value + i*14);
599 (void)f.get_uint32(); // offset to symbol set name
600 uint32 off1 = f.get_uint32(); // offset to selection string
601 uint32 off2 = f.get_uint32(); // offset to symbol set index array
602 (void)f.get_uint16(); // index array length
603 // (why is this needed?)
604 f.seek(off1);
605 unsigned int j;
606 uint16 kind = 0;
607 for (j = 0; j < off2 - off1; j++) {
608 unsigned char c = f.get_byte();
609 if ('0' <= c && c <= '9')
610 kind = kind*10 + (c - '0');
611 else if ('A' <= c && c <= 'Z')
612 kind = kind*32 + (c - 64);
614 symbol_set_table[i].select = kind;
615 for (j = 0; j < 256; j++)
616 symbol_set_table[i].index[j] = f.get_uint16();
619 for (i = 0; i < nchars; i++)
620 char_table[i].symbol_set = NO_SYMBOL_SET;
622 uint16 *symbol_set_selectors = special_flag ? special_symbol_sets
623 : text_symbol_sets;
624 for (i = 0; symbol_set_selectors[i] != 0; i++) {
625 unsigned int j;
626 for (j = 0; j < n_symbol_sets; j++)
627 if (symbol_set_table[j].select == symbol_set_selectors[i])
628 break;
629 if (j < n_symbol_sets) {
630 for (int k = 0; k < 256; k++) {
631 uint16 index = symbol_set_table[j].index[k];
632 if (index != 0xffff
633 && char_table[index].symbol_set == NO_SYMBOL_SET) {
634 char_table[index].symbol_set = symbol_set_table[j].select;
635 char_table[index].code = k;
642 static
643 void read_char_table(File &f)
645 require_tag(charcode_tag);
646 nchars = tag_info(charcode_tag).count;
647 char_table = new char_info[nchars];
649 f.seek(tag_info(charcode_tag).value);
650 uint32 i;
651 for (i = 0; i < nchars; i++)
652 char_table[i].charcode = f.get_uint16();
654 require_tag(width_tag);
655 f.seek(tag_info(width_tag).value);
656 for (i = 0; i < nchars; i++)
657 char_table[i].width = f.get_uint16();
659 require_tag(ascent_tag);
660 f.seek(tag_info(ascent_tag).value);
661 for (i = 0; i < nchars; i++) {
662 char_table[i].ascent = f.get_uint16();
663 if (char_table[i].ascent < 0)
664 char_table[i].ascent = 0;
667 require_tag(descent_tag);
668 f.seek(tag_info(descent_tag).value);
669 for (i = 0; i < nchars; i++) {
670 char_table[i].descent = f.get_uint16();
671 if (char_table[i].descent > 0)
672 char_table[i].descent = 0;
675 require_tag(left_extent_tag);
676 f.seek(tag_info(left_extent_tag).value);
677 for (i = 0; i < nchars; i++)
678 char_table[i].left_extent = int16(f.get_uint16());
680 require_tag(right_extent_tag);
681 f.seek(tag_info(right_extent_tag).value);
682 for (i = 0; i < nchars; i++)
683 char_table[i].right_extent = f.get_uint16();
686 static
687 void output_pclweight()
689 require_tag(stroke_weight_tag);
690 int stroke_weight = tag_info(stroke_weight_tag).value;
691 int pcl_stroke_weight;
692 if (stroke_weight < 128)
693 pcl_stroke_weight = -3;
694 else if (stroke_weight == 128)
695 pcl_stroke_weight = 0;
696 else if (stroke_weight <= 145)
697 pcl_stroke_weight = 1;
698 else if (stroke_weight <= 179)
699 pcl_stroke_weight = 3;
700 else
701 pcl_stroke_weight = 4;
702 printf("pclweight %d\n", pcl_stroke_weight);
705 static
706 void output_pclproportional()
708 require_tag(spacing_tag);
709 printf("pclproportional %d\n", tag_info(spacing_tag).value == 0);
712 static
713 void read_and_output_pcltypeface(File &f)
715 printf("pcltypeface ");
716 require_tag(typeface_tag);
717 f.seek(tag_info(typeface_tag).value);
718 for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) {
719 unsigned char c = f.get_byte();
720 if (c == '\0')
721 break;
722 putchar(c);
724 printf("\n");
727 static
728 void output_pclstyle()
730 unsigned pcl_style = 0;
731 // older tfms don't have the posture tag
732 if (tag_info(posture_tag).present) {
733 if (tag_info(posture_tag).value)
734 pcl_style |= 1;
736 else {
737 require_tag(slant_tag);
738 if (tag_info(slant_tag).value != 0)
739 pcl_style |= 1;
741 require_tag(appearance_width_tag);
742 if (tag_info(appearance_width_tag).value < 100) // guess
743 pcl_style |= 4;
744 printf("pclstyle %d\n", pcl_style);
747 static
748 void output_slant()
750 require_tag(slant_tag);
751 int slant = int16(tag_info(slant_tag).value);
752 if (slant != 0)
753 printf("slant %f\n", slant/100.0);
756 static
757 void output_ligatures()
759 // don't use ligatures for fixed space font
760 require_tag(spacing_tag);
761 if (tag_info(spacing_tag).value != 0)
762 return;
763 static const char *ligature_names[] = {
764 "fi", "fl", "ff", "ffi", "ffl"
767 static const char *ligature_chars[] = {
768 "fi", "fl", "ff", "Fi", "Fl"
771 unsigned ligature_mask = 0;
772 unsigned int i;
773 for (i = 0; i < nchars; i++) {
774 uint16 charcode = char_table[i].charcode;
775 if (charcode < charcode_name_table_size
776 && char_table[i].symbol_set != NO_SYMBOL_SET) {
777 for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
778 for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
779 if (strcmp(p->name, ligature_chars[j]) == 0) {
780 ligature_mask |= 1 << j;
781 break;
785 if (ligature_mask) {
786 printf("ligatures");
787 for (i = 0; i < SIZEOF(ligature_names); i++)
788 if (ligature_mask & (1 << i))
789 printf(" %s", ligature_names[i]);
790 printf(" 0\n");
794 static
795 void read_and_output_kernpairs(File &f)
797 if (tag_info(pair_kern_tag).present) {
798 printf("kernpairs\n");
799 f.seek(tag_info(pair_kern_tag).value);
800 uint16 n_pairs = f.get_uint16();
801 for (int i = 0; i < n_pairs; i++) {
802 uint16 i1 = f.get_uint16();
803 uint16 i2 = f.get_uint16();
804 int16 val = int16(f.get_uint16());
805 if (char_table[i1].symbol_set != NO_SYMBOL_SET
806 && char_table[i2].symbol_set != NO_SYMBOL_SET
807 && char_table[i1].charcode < charcode_name_table_size
808 && char_table[i2].charcode < charcode_name_table_size) {
809 for (name_list *p = charcode_name_table[char_table[i1].charcode];
811 p = p->next)
812 for (name_list *q = charcode_name_table[char_table[i2].charcode];
814 q = q->next)
815 if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
816 printf("%s %s %d\n", p->name, q->name, scale(val));
822 static
823 void output_charset(const int tfm_type)
825 require_tag(slant_tag);
826 double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
827 double slant = sin(slant_angle)/cos(slant_angle);
829 require_tag(x_height_tag);
830 require_tag(lower_ascent_tag);
831 require_tag(lower_descent_tag);
833 printf("charset\n");
834 unsigned int i;
835 for (i = 0; i < nchars; i++) {
836 uint16 charcode = char_table[i].charcode;
838 // the glyph is bound to one of the searched symbol sets
839 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
840 // the character was in the map file
841 if (charcode < charcode_name_table_size && charcode_name_table[charcode])
842 printf("%s", charcode_name_table[charcode]->name);
843 else if (!all_flag)
844 continue;
845 else if (tfm_type == MSL)
846 printf(hp_msl_to_ucode_name(charcode));
847 else
848 printf(unicode_to_ucode_name(charcode));
850 printf("\t%d,%d",
851 scale(char_table[i].width), scale(char_table[i].ascent));
853 int depth = scale(-char_table[i].descent);
854 if (depth < 0)
855 depth = 0;
856 int italic_correction = 0;
857 int left_italic_correction = 0;
858 int subscript_correction = 0;
860 if (italic_flag) {
861 italic_correction = scale(char_table[i].right_extent
862 - char_table[i].width
863 + italic_sep);
864 if (italic_correction < 0)
865 italic_correction = 0;
866 subscript_correction = int((tag_info(x_height_tag).value
867 * slant * .8) + .5);
868 if (subscript_correction > italic_correction)
869 subscript_correction = italic_correction;
870 left_italic_correction = scale(italic_sep
871 - char_table[i].left_extent);
874 if (subscript_correction != 0)
875 printf(",%d,%d,%d,%d",
876 depth, italic_correction, left_italic_correction,
877 subscript_correction);
878 else if (left_italic_correction != 0)
879 printf(",%d,%d,%d", depth, italic_correction, left_italic_correction);
880 else if (italic_correction != 0)
881 printf(",%d,%d", depth, italic_correction);
882 else if (depth != 0)
883 printf(",%d", depth);
884 // This is fairly arbitrary. Fortunately it doesn't much matter.
885 unsigned type = 0;
886 if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
887 type |= 2;
888 if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
889 type |= 1;
890 printf("\t%d\t%d", type,
891 char_table[i].symbol_set*256 + char_table[i].code);
893 if (tfm_type == UNICODE) {
894 if (charcode >= 0xE000 && charcode <= 0xF8FF)
895 printf("\t\t-- HP PUA U+%04X", charcode);
896 else
897 printf("\t\t-- U+%04X", charcode);
899 else
900 printf("\t\t-- MSL %4d", charcode);
901 printf(" (%3s %3d)\n",
902 show_symset(char_table[i].symbol_set), char_table[i].code);
904 if (charcode < charcode_name_table_size
905 && charcode_name_table[charcode])
906 for (name_list *p = charcode_name_table[charcode]->next;
907 p; p = p->next)
908 printf("%s\t\"\n", p->name);
910 // warnings about characters in mapfile not found in TFM
911 else if (charcode < charcode_name_table_size
912 && charcode_name_table[charcode]) {
913 char *name = charcode_name_table[charcode]->name;
914 // don't warn about Unicode or unnamed glyphs
915 // that aren't in the the TFM file
916 if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
917 && !is_uname(name)) {
918 fprintf(stderr, "%s: warning: symbol U+%04X (%s",
919 program_name, charcode, name);
920 for (name_list *p = charcode_name_table[charcode]->next;
921 p; p = p->next)
922 fprintf(stderr, ", %s", p->name);
923 fprintf(stderr, ") not in any searched symbol set\n");
925 else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) {
926 fprintf(stderr, "%s: warning: symbol MSL %d (%s",
927 program_name, charcode, name);
928 for (name_list *p = charcode_name_table[charcode]->next;
929 p; p = p->next)
930 fprintf(stderr, ", %s", p->name);
931 fprintf(stderr, ") not in any searched symbol set\n");
937 #define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
939 static
940 void dump_tags(File &f)
942 double upem = -1.0;
944 printf("TFM tags\n"
945 "\n"
946 "tag# type count value\n"
947 "---------------------\n");
949 for (int i = min_tag; i <= max_tag; i++) {
950 enum tag_type t = tag_type(i);
951 if (tag_info(t).present) {
952 printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
953 switch (tag_info(t).type) {
954 case BYTE_TYPE:
955 case USHORT_TYPE:
956 printf(" %5u", tag_info(t).value);
957 switch (i) {
958 case type_tag:
959 printf(" Font Type ");
960 switch (tag_info(t).value) {
961 case MSL:
962 case SYMSET:
963 printf("(Intellifont)");
964 break;
965 case UNICODE:
966 printf("(TrueType)");
968 break;
969 case charcode_tag:
970 printf(" Number of Symbols (%u)", tag_info(t).count);
971 break;
972 case symbol_set_tag:
973 printf(" Symbol Sets (%u): ",
974 tag_info(symbol_set_tag).count / 14);
975 dump_symbol_sets(f);
976 break;
977 case type_structure_tag:
978 printf(" Type Structure (%u)", tag_info(t).value);
979 break;
980 case stroke_weight_tag:
981 printf(" Stroke Weight (%u)", tag_info(t).value);
982 break;
983 case spacing_tag:
984 printf(" Spacing ");
985 switch (tag_info(t).value) {
986 case 0:
987 printf("(Proportional)");
988 break;
989 case 1:
990 printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
991 em_fract(tag_info(t).value));
992 break;
994 break;
995 case appearance_width_tag:
996 printf(" Appearance Width (%u)", tag_info(t).value);
997 break;
998 case serif_style_tag:
999 printf(" Serif Style (%u)", tag_info(t).value);
1000 break;
1001 case posture_tag:
1002 printf(" Posture (%s)", tag_info(t).value == 0
1003 ? "Upright"
1004 : tag_info(t).value == 1
1005 ? "Italic"
1006 : "Alternate Italic");
1007 break;
1008 case max_width_tag:
1009 printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
1010 em_fract(tag_info(t).value));
1011 break;
1012 case word_spacing_tag:
1013 printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
1014 em_fract(tag_info(t).value));
1015 break;
1016 case recommended_line_spacing_tag:
1017 printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
1018 em_fract(tag_info(t).value));
1019 break;
1020 case x_height_tag:
1021 printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
1022 em_fract(tag_info(t).value));
1023 break;
1024 case cap_height_tag:
1025 printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
1026 em_fract(tag_info(t).value));
1027 break;
1028 case max_ascent_tag:
1029 printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
1030 em_fract(tag_info(t).value));
1031 break;
1032 case lower_ascent_tag:
1033 printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
1034 em_fract(tag_info(t).value));
1035 break;
1036 case underscore_thickness_tag:
1037 printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
1038 em_fract(tag_info(t).value));
1039 break;
1040 case uppercase_accent_height_tag:
1041 printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1042 em_fract(tag_info(t).value));
1043 break;
1044 case lowercase_accent_height_tag:
1045 printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1046 em_fract(tag_info(t).value));
1047 break;
1048 case width_tag:
1049 printf(" Horizontal Escapement array");
1050 break;
1051 case vertical_escapement_tag:
1052 printf(" Vertical Escapement array");
1053 break;
1054 case right_extent_tag:
1055 printf(" Right Extent array");
1056 break;
1057 case ascent_tag:
1058 printf(" Character Ascent array");
1059 break;
1060 case pair_kern_tag:
1061 f.seek(tag_info(t).value);
1062 printf(" Kern Pairs (%u)", f.get_uint16());
1063 break;
1064 case panose_tag:
1065 printf(" PANOSE Classification array");
1066 break;
1068 break;
1069 case SIGNED_SHORT_TYPE:
1070 printf(" %5d", int16(tag_info(t).value));
1071 switch (i) {
1072 case slant_tag:
1073 printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
1074 break;
1075 case max_descent_tag:
1076 printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1077 em_fract(int16(tag_info(t).value)));
1078 break;
1079 case lower_descent_tag:
1080 printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1081 em_fract(int16(tag_info(t).value)));
1082 break;
1083 case underscore_depth_tag:
1084 printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
1085 em_fract(int16(tag_info(t).value)));
1086 break;
1087 case left_extent_tag:
1088 printf(" Left Extent array");
1089 break;
1090 // The type of this tag has changed from SHORT to SIGNED SHORT
1091 // in TFM version 1.3.0.
1092 case ascent_tag:
1093 printf(" Character Ascent array");
1094 break;
1095 case descent_tag:
1096 printf(" Character Descent array");
1097 break;
1099 break;
1100 case RATIONAL_TYPE:
1101 printf(" %5u", tag_info(t).value);
1102 switch (i) {
1103 case inches_per_point_tag:
1104 printf(" Inches per Point");
1105 break;
1106 case nominal_point_size_tag:
1107 printf(" Nominal Point Size");
1108 break;
1109 case design_units_per_em_tag:
1110 printf(" Design Units per Em");
1111 break;
1112 case average_width_tag:
1113 printf(" Average Width");
1114 break;
1116 if (tag_info(t).count == 1) {
1117 f.seek(tag_info(t).value);
1118 uint32 num = f.get_uint32();
1119 uint32 den = f.get_uint32();
1120 if (i == design_units_per_em_tag)
1121 upem = double(num) / den;
1122 printf(" (%u/%u = %g)", num, den, double(num)/den);
1124 break;
1125 case ASCII_TYPE:
1126 printf(" %5u ", tag_info(t).value);
1127 switch (i) {
1128 case comment_tag:
1129 printf("Comment ");
1130 break;
1131 case copyright_tag:
1132 printf("Copyright ");
1133 break;
1134 case unique_identifier_tag:
1135 printf("Unique ID ");
1136 break;
1137 case font_name_tag:
1138 printf("Typeface Name ");
1139 break;
1140 case typeface_source_tag:
1141 printf("Typeface Source ");
1142 break;
1143 case typeface_tag:
1144 printf("PCL Typeface ");
1145 break;
1147 dump_ascii(f, t);
1149 putchar('\n');
1152 putchar('\n');
1154 #undef em_fract
1156 static
1157 void dump_ascii(File &f, tag_type t)
1159 putchar('"');
1160 if (tag_info(t).count > 4) {
1161 int count = tag_info(t).count;
1162 f.seek(tag_info(t).value);
1163 while (--count)
1164 printf("%c", f.get_byte());
1166 else
1167 printf("%.4s", (char*)(tag_info(t).value));
1168 putchar('"');
1171 static
1172 void dump_symbol_sets(File &f)
1174 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
1175 uint32 n_symbol_sets = symbol_set_dir_length / 14;
1177 for (uint32 i = 0; i < n_symbol_sets; i++) {
1178 f.seek(tag_info(symbol_set_tag).value + i * 14);
1179 (void)f.get_uint32(); // offset to symbol set name
1180 uint32 off1 = f.get_uint32(); // offset to selection string
1181 uint32 off2 = f.get_uint32(); // offset to symbol set index array
1182 f.seek(off1);
1183 for (uint32 j = 0; j < off2 - off1; j++) {
1184 unsigned char c = f.get_byte();
1185 if ('0' <= c && c <= '9')
1186 putchar(c);
1187 else if ('A' <= c && c <= 'Z')
1188 printf(i < n_symbol_sets - 1 ? "%c," : "%c", c);
1193 static
1194 void dump_symbols(int tfm_type)
1196 printf("Symbols:\n"
1197 "\n"
1198 " glyph id# symbol set name(s)\n"
1199 "----------------------------------\n");
1200 for (uint32 i = 0; i < nchars; i++) {
1201 uint16 charcode = char_table[i].charcode;
1202 if (charcode < charcode_name_table_size
1203 && charcode_name_table[charcode]) {
1204 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
1205 printf(tfm_type == UNICODE ? "%4d (U+%04X) (%3s %3d) %s"
1206 : "%4d (MSL %4d) (%3s %3d) %s",
1207 i, charcode,
1208 show_symset(char_table[i].symbol_set),
1209 char_table[i].code,
1210 charcode_name_table[charcode]->name);
1211 for (name_list *p = charcode_name_table[charcode]->next;
1212 p; p = p->next)
1213 printf(", %s", p->name);
1214 putchar('\n');
1217 else {
1218 printf(tfm_type == UNICODE ? "%4d (U+%04X) "
1219 : "%4d (MSL %4d) ",
1220 i, charcode);
1221 uint16 symset, code;
1222 if (char_table[i].symbol_set != NO_SYMBOL_SET)
1223 printf("(%3s %3d)",
1224 show_symset(char_table[i].symbol_set), char_table[i].code);
1225 else if (get_printcode(i, &symset, &code))
1226 printf("[%3s %3d] (set not searched)", show_symset(symset), code);
1227 putchar('\n');
1232 static char *
1233 show_symset(unsigned int symset)
1235 static char symset_str[8];
1237 sprintf(symset_str, "%d%c", symset / 32, (symset & 31) + 64);
1238 return symset_str;
1241 // search symbol sets opposite the state of the 's' option
1242 static int
1243 get_printcode(uint32 index, uint16 *symset, uint16 *code)
1245 uint16 *symbol_set_selectors = special_flag ? text_symbol_sets
1246 : special_symbol_sets;
1247 for (unsigned int i = 0; symbol_set_selectors[i] != 0; i++) {
1248 unsigned int j;
1249 for (j = 0; j < n_symbol_sets; j++)
1250 if (symbol_set_table[j].select == symbol_set_selectors[i])
1251 break;
1252 if (j < n_symbol_sets) {
1253 for (unsigned int k = 0; k < 256; k++) {
1254 uint16 ndx = symbol_set_table[j].index[k];
1255 if (ndx == index) {
1256 *symset = symbol_set_table[j].select;
1257 *code = k;
1258 return 1;
1264 // not found in text or special symbol sets
1265 return 0;
1268 static char *
1269 hp_msl_to_ucode_name(int msl)
1271 char codestr[8];
1273 sprintf(codestr, "%d", msl);
1274 const char *ustr = hp_msl_to_unicode_code(codestr);
1275 if (ustr == NULL)
1276 ustr = UNNAMED;
1277 else {
1278 char *nonum;
1279 int ucode = int(strtol(ustr, &nonum, 16));
1280 // don't allow PUA code points as Unicode names
1281 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1282 ustr = UNNAMED;
1284 if (!equal(ustr, UNNAMED)) {
1285 const char *uname_decomposed = decompose_unicode(ustr);
1286 if (uname_decomposed)
1287 // 1st char is the number of components
1288 ustr = uname_decomposed + 1;
1290 char *value = new char[strlen(ustr) + 1];
1291 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1292 return value;
1295 static char *
1296 unicode_to_ucode_name(int ucode)
1298 const char *ustr;
1299 char codestr[8];
1301 // don't allow PUA code points as Unicode names
1302 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1303 ustr = UNNAMED;
1304 else {
1305 sprintf(codestr, "%04X", ucode);
1306 ustr = codestr;
1308 if (!equal(ustr, UNNAMED)) {
1309 const char *uname_decomposed = decompose_unicode(ustr);
1310 if (uname_decomposed)
1311 // 1st char is the number of components
1312 ustr = uname_decomposed + 1;
1314 char *value = new char[strlen(ustr) + 1];
1315 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1316 return value;
1319 static int
1320 is_uname(char *name)
1322 size_t i;
1323 size_t len = strlen(name);
1324 if (len % 5)
1325 return 0;
1327 if (name[0] != 'u')
1328 return 0;
1329 for (i = 1; i < 4; i++)
1330 if (!csxdigit(name[i]))
1331 return 0;
1332 for (i = 5; i < len; i++)
1333 if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
1334 return 0;
1336 return 1;
1339 static
1340 int read_map(const char *file, const int tfm_type)
1342 errno = 0;
1343 FILE *fp = fopen(file, "r");
1344 if (!fp) {
1345 error("can't open `%1': %2", file, strerror(errno));
1346 return 0;
1348 current_filename = file;
1349 char buf[512];
1350 current_lineno = 0;
1351 char *nonum;
1352 while (fgets(buf, int(sizeof(buf)), fp)) {
1353 current_lineno++;
1354 char *ptr = buf;
1355 while (csspace(*ptr))
1356 ptr++;
1357 if (*ptr == '\0' || *ptr == '#')
1358 continue;
1359 ptr = strtok(ptr, " \n\t");
1360 if (!ptr)
1361 continue;
1363 int msl_code = int(strtol(ptr, &nonum, 10));
1364 if (*nonum != '\0') {
1365 if (csxdigit(*nonum))
1366 error("bad MSL map: got hex code (%1)", ptr);
1367 else if (ptr == nonum)
1368 error("bad MSL map: bad MSL code (%1)", ptr);
1369 else
1370 error("bad MSL map");
1371 fclose(fp);
1372 return 0;
1375 ptr = strtok(NULL, " \n\t");
1376 if (!ptr)
1377 continue;
1378 int unicode = int(strtol(ptr, &nonum, 16));
1379 if (*nonum != '\0') {
1380 if (ptr == nonum)
1381 error("bad Unicode value (%1)", ptr);
1382 else
1383 error("bad Unicode map");
1384 fclose(fp);
1385 return 0;
1387 if (strlen(ptr) != 4) {
1388 error("bad Unicode value (%1)", ptr);
1389 return 0;
1392 int n = tfm_type == MSL ? msl_code : unicode;
1393 if (tfm_type == UNICODE && n > 0xFFFF) {
1394 // greatest value supported by TFM files
1395 error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
1396 fclose(fp);
1397 return 0;
1399 else if (n < 0) {
1400 error("negative code value (%1)", ptr);
1401 fclose(fp);
1402 return 0;
1405 ptr = strtok(NULL, " \n\t");
1406 if (!ptr) { // groff name
1407 error("missing name(s)");
1408 fclose(fp);
1409 return 0;
1411 // leave decomposed Unicode values alone
1412 else if (is_uname(ptr) && !is_decomposed(ptr))
1413 ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
1415 if (size_t(n) >= charcode_name_table_size) {
1416 size_t old_size = charcode_name_table_size;
1417 name_list **old_table = charcode_name_table;
1418 charcode_name_table_size = n + 256;
1419 charcode_name_table = new name_list *[charcode_name_table_size];
1420 if (old_table) {
1421 memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
1422 a_delete old_table;
1424 for (size_t i = old_size; i < charcode_name_table_size; i++)
1425 charcode_name_table[i] = NULL;
1428 for (; ptr; ptr = strtok(NULL, " \n\t"))
1429 charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
1431 fclose(fp);
1432 return 1;
1435 static
1436 const char *xbasename(const char *s)
1438 // DIR_SEPS[] are possible directory separator characters, see
1439 // nonposix.h. We want the rightmost separator of all possible
1440 // ones. Example: d:/foo\\bar.
1441 const char *b = strrchr(s, DIR_SEPS[0]), *b1;
1442 const char *sep = &DIR_SEPS[1];
1444 while (*sep)
1446 b1 = strrchr(s, *sep);
1447 if (b1 && (!b || b1 > b))
1448 b = b1;
1449 sep++;
1451 return b ? b + 1 : s;