[doc] Improve formatting of tables in HTML format.
[ttfautohint.git] / lib / afblue.pl
blob774438f84a79e11a5c20cb8235fdef85093abd7c
1 #! /usr/bin/perl -w
2 # -*- Perl -*-
4 # afblue.pl
6 # Process a blue zone character data file.
8 # Copyright 2013 by
9 # David Turner, Robert Wilhelm, and Werner Lemberg.
11 # This file is part of the FreeType project, and may only be used,
12 # modified, and distributed under the terms of the FreeType project
13 # license, LICENSE.TXT. By continuing to use, modify, or distribute
14 # this file you indicate that you have read the license and
15 # understand and accept it fully.
17 use strict;
18 use warnings;
19 use English '-no_match_vars';
20 use open ':std', ':locale';
23 my $prog = $PROGRAM_NAME;
24 $prog =~ s| .* / ||x; # Remove path.
26 die "usage: $prog datafile < infile > outfile\n" if $#ARGV != 0;
29 my $datafile = $ARGV[0];
31 my %diversions; # The extracted and massaged data from `datafile'.
32 my @else_stack; # Booleans to track else-clauses.
33 my @name_stack; # Stack of integers used for names of aux. variables.
35 my $curr_enum; # Name of the current enumeration.
36 my $curr_array; # Name of the current array.
37 my $curr_max; # Name of the current maximum value.
39 my $curr_enum_element; # Name of the current enumeration element.
40 my $curr_offset; # The offset relative to current aux. variable.
41 my $curr_elem_size; # The size of the current string or block.
43 my $have_sections = 0; # Boolean; set if start of a section has been seen.
44 my $have_strings; # Boolean; set if current section contains strings.
45 my $have_blocks; # Boolean; set if current section contains blocks.
47 my $have_enum_element; # Boolean; set if we have an enumeration element.
48 my $in_string; # Boolean; set if a string has been parsed.
50 my $num_sections = 0; # Number of sections seen so far.
52 my $last_aux; # Name of last auxiliary variable.
55 # Regular expressions.
57 # [<ws>] <enum_name> <ws> <array_name> <ws> <max_name> [<ws>] ':' [<ws>] '\n'
58 my $section_re = qr/ ^ \s* (\S+) \s+ (\S+) \s+ (\S+) \s* : \s* $ /x;
60 # [<ws>] <enum_element_name> [<ws>] '\n'
61 my $enum_element_re = qr/ ^ \s* ( [A-Za-z0-9_]+ ) \s* $ /x;
63 # '#' <preprocessor directive> '\n'
64 my $preprocessor_re = qr/ ^ \# /x;
66 # '/' '/' <comment> '\n'
67 my $comment_re = qr| ^ // |x;
69 # empty line
70 my $whitespace_only_re = qr/ ^ \s* $ /x;
72 # [<ws>] '"' <string> '"' [<ws>] '\n' (<string> doesn't contain newlines)
73 my $string_re = qr/ ^ \s*
74 " ( (?: [^"\\]++ | \\. )*+ ) "
75 \s* $ /x;
77 # [<ws>] '{' <block> '}' [<ws>] '\n' (<block> can contain newlines)
78 my $block_start_re = qr/ ^ \s* \{ /x;
80 # We need the capturing group for `split' to make it return the separator
81 # tokens (i.e., the opening and closing brace) also.
82 my $brace_re = qr/ ( [{}] ) /x;
85 sub Warn
87 my $message = shift;
88 warn "$datafile:$INPUT_LINE_NUMBER: warning: $message\n";
92 sub Die
94 my $message = shift;
95 die "$datafile:$INPUT_LINE_NUMBER: error: $message\n";
99 my $warned_before = 0;
101 sub warn_before
103 Warn("data before first section gets ignored") unless $warned_before;
104 $warned_before = 1;
108 sub strip_newline
110 chomp;
111 s/ \x0D $ //x;
115 sub end_curr_string
117 # Append final null byte to string.
118 if ($have_strings)
120 push @{$diversions{$curr_array}}, " '\\0',\n" if $in_string;
122 $curr_offset++;
123 $in_string = 0;
128 sub update_max_elem_size
130 if ($curr_elem_size)
132 my $max = pop @{$diversions{$curr_max}};
133 $max = $curr_elem_size if $curr_elem_size > $max;
134 push @{$diversions{$curr_max}}, $max;
139 sub convert_non_ascii_char
141 # A UTF-8 character outside of the printable ASCII range, with possibly a
142 # leading backslash character.
143 my $s = shift;
145 # Here we count characters, not bytes.
146 $curr_elem_size += length $s;
148 utf8::encode($s);
149 $s = uc unpack 'H*', $s;
151 $curr_offset += $s =~ s/\G(..)/'\\x$1', /sg;
153 return $s;
157 sub convert_ascii_chars
159 # A series of ASCII characters in the printable range.
160 my $s = shift;
162 my $count = $s =~ s/\G(.)/'$1', /g;
163 $curr_offset += $count;
164 $curr_elem_size += $count;
166 return $s;
170 sub convert_literal
172 my $s = shift;
173 my $orig = $s;
175 # ASCII printables and space
176 my $safe_re = '\x20-\x7E';
177 # ASCII printables and space, no backslash
178 my $safe_no_backslash_re = '\x20-\x5B\x5D-\x7E';
180 $s =~ s{
181 (?: \\? ( [^$safe_re] )
182 | ( (?: [$safe_no_backslash_re]
183 | \\ [$safe_re] )+ ) )
186 defined($1) ? convert_non_ascii_char($1)
187 : convert_ascii_chars($2)
188 }egx;
190 # We assume that `$orig' doesn't contain `*/'
191 return $s . " /* $orig */";
195 sub aux_name
197 return "af_blue_" . $num_sections. "_" . join('_', reverse @name_stack);
201 sub aux_name_next
203 $name_stack[$#name_stack]++;
204 my $name = aux_name();
205 $name_stack[$#name_stack]--;
207 return $name;
211 sub enum_val_string
213 # Build string which holds code to save the current offset in an
214 # enumeration element.
215 my $aux = shift;
217 my $add = ($last_aux eq "af_blue_" . $num_sections . "_0" )
218 ? ""
219 : "$last_aux + ";
221 return " $aux = $add$curr_offset,\n";
226 # Process data file.
228 open(DATA, $datafile) || die "$prog: can't open \`$datafile': $OS_ERROR\n";
230 while (<DATA>)
232 strip_newline();
234 next if /$comment_re/;
235 next if /$whitespace_only_re/;
237 if (/$section_re/)
239 Warn("previous section is empty") if ($have_sections
240 && !$have_strings
241 && !$have_blocks);
243 end_curr_string();
244 update_max_elem_size();
246 # Save captured groups from `section_re'.
247 $curr_enum = $1;
248 $curr_array = $2;
249 $curr_max = $3;
251 $curr_enum_element = "";
252 $curr_offset = 0;
254 Warn("overwriting already defined enumeration \`$curr_enum'")
255 if exists($diversions{$curr_enum});
256 Warn("overwriting already defined array \`$curr_array'")
257 if exists($diversions{$curr_array});
258 Warn("overwriting already defined maximum value \`$curr_max'")
259 if exists($diversions{$curr_max});
261 $diversions{$curr_enum} = [];
262 $diversions{$curr_array} = [];
263 $diversions{$curr_max} = [];
265 push @{$diversions{$curr_max}}, 0;
267 @name_stack = ();
268 push @name_stack, 0;
270 $have_sections = 1;
271 $have_strings = 0;
272 $have_blocks = 0;
274 $have_enum_element = 0;
275 $in_string = 0;
277 $num_sections++;
278 $curr_elem_size = 0;
280 $last_aux = aux_name();
282 next;
285 if (/$preprocessor_re/)
287 if ($have_sections)
289 # Having preprocessor conditionals complicates the computation of
290 # correct offset values. We have to introduce auxiliary enumeration
291 # elements with the name `af_blue_<s>_<n1>_<n2>_...' which store
292 # offsets to be used in conditional clauses. `<s>' is the number of
293 # sections seen so far, `<n1>' is the number of `#if' and `#endif'
294 # conditionals seen so far in the topmost level, `<n2>' the number of
295 # `#if' and `#endif' conditionals seen so far one level deeper, etc.
296 # As a consequence, uneven values are used within a clause, and even
297 # values after a clause, since the C standard doesn't allow the
298 # redefinition of an enumeration value. For example, the name
299 # `af_blue_5_1_6' is used to construct enumeration values in the fifth
300 # section after the third (second-level) if-clause within the first
301 # (top-level) if-clause. After the first top-level clause has
302 # finished, `af_blue_5_2' is used. The current offset is then
303 # relative to the value stored in the current auxiliary element.
305 if (/ ^ \# \s* if /x)
307 push @else_stack, 0;
309 $name_stack[$#name_stack]++;
311 push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
312 $last_aux = aux_name();
314 push @name_stack, 0;
316 $curr_offset = 0;
318 elsif (/ ^ \# \s* elif /x)
320 Die("unbalanced #elif") unless @else_stack;
322 pop @name_stack;
324 push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
325 $last_aux = aux_name();
327 push @name_stack, 0;
329 $curr_offset = 0;
331 elsif (/ ^ \# \s* else /x)
333 my $prev_else = pop @else_stack;
334 Die("unbalanced #else") unless defined($prev_else);
335 Die("#else already seen") if $prev_else;
336 push @else_stack, 1;
338 pop @name_stack;
340 push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
341 $last_aux = aux_name();
343 push @name_stack, 0;
345 $curr_offset = 0;
347 elsif (/ ^ \# \s* endif /x)
349 my $prev_else = pop @else_stack;
350 Die("unbalanced #endif") unless defined($prev_else);
352 pop @name_stack;
353 $name_stack[$#name_stack]++;
355 # If there is no else-clause for an if-clause, we add one. This is
356 # necessary to have correct offsets.
357 if (!$prev_else)
359 push @{$diversions{$curr_enum}}, enum_val_string(aux_name())
360 . "#else\n";
362 $curr_offset = 0;
365 push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
366 $last_aux = aux_name();
368 $curr_offset = 0;
371 # Handle (probably continued) preprocessor lines.
372 CONTINUED_LOOP:
376 strip_newline();
378 push @{$diversions{$curr_enum}}, $ARG . "\n";
379 push @{$diversions{$curr_array}}, $ARG . "\n";
381 last CONTINUED_LOOP unless / \\ $ /x;
383 } while (<DATA>);
386 else
388 warn_before();
391 next;
394 if (/$enum_element_re/)
396 end_curr_string();
397 update_max_elem_size();
399 $curr_enum_element = $1;
400 $have_enum_element = 1;
401 $curr_elem_size = 0;
403 next;
406 if (/$string_re/)
408 if ($have_sections)
410 Die("strings and blocks can't be mixed in a section") if $have_blocks;
412 # Save captured group from `string_re'.
413 my $string = $1;
415 if ($have_enum_element)
417 push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
418 $have_enum_element = 0;
421 $string = convert_literal($string);
423 push @{$diversions{$curr_array}}, " $string\n";
425 $have_strings = 1;
426 $in_string = 1;
428 else
430 warn_before();
433 next;
436 if (/$block_start_re/)
438 if ($have_sections)
440 Die("strings and blocks can't be mixed in a section") if $have_strings;
442 my $depth = 0;
443 my $block = "";
444 my $block_end = 0;
446 # Count braces while getting the block.
447 BRACE_LOOP:
451 strip_newline();
453 foreach my $substring (split(/$brace_re/))
455 if ($block_end)
457 Die("invalid data after last matching closing brace")
458 if $substring !~ /$whitespace_only_re/;
461 $block .= $substring;
463 if ($substring eq '{')
465 $depth++;
467 elsif ($substring eq '}')
469 $depth--;
471 $block_end = 1 if $depth == 0;
475 # If we are here, we have run out of substrings, so get next line
476 # or exit.
477 last BRACE_LOOP if $block_end;
479 $block .= "\n";
481 } while (<DATA>);
484 if ($have_enum_element)
486 push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
487 $have_enum_element = 0;
490 push @{$diversions{$curr_array}}, $block . ",\n";
492 $curr_offset++;
493 $curr_elem_size++;
495 $have_blocks = 1;
497 else
499 warn_before();
502 next;
505 # Garbage. We weren't able to parse the data.
506 Die("syntax error");
509 # Finalize data.
510 end_curr_string();
511 update_max_elem_size();
514 # Filter stdin to stdout, replacing `@...@' templates.
516 sub emit_diversion
518 my $diversion_name = shift;
519 return (exists($diversions{$1})) ? "@{$diversions{$1}}"
520 : "@" . $diversion_name . "@";
524 $LIST_SEPARATOR = '';
526 my $s1 = "This file has been generated by the Perl script \`$prog',";
527 my $s1len = length $s1;
528 my $s2 = "using data from file \`$datafile'.";
529 my $s2len = length $s2;
530 my $slen = ($s1len > $s2len) ? $s1len : $s2len;
532 print "/* " . $s1 . " " x ($slen - $s1len) . " */\n"
533 . "/* " . $s2 . " " x ($slen - $s2len) . " */\n"
534 . "\n";
536 while (<STDIN>)
538 s/ @ ( [A-Za-z0-9_]+? ) @ / emit_diversion($1) /egx;
539 print;
542 # EOF