Update copyright for 2022
[pgsql.git] / src / common / unicode / generate-unicode_east_asian_fw_table.pl
blob9d03684c4dd027095ca1cac536785b7418d6521a
1 #!/usr/bin/perl
3 # Generate a sorted list of non-overlapping intervals of East Asian Wide (W)
4 # and East Asian Fullwidth (F) characters, using Unicode data files as input.
5 # Pass EastAsianWidth.txt as argument. The output is on stdout.
7 # Copyright (c) 2019-2022, PostgreSQL Global Development Group
9 use strict;
10 use warnings;
12 my $range_start = undef;
13 my ($first, $last);
14 my $prev_last;
16 print
17 "/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */\n\n";
19 print "static const struct mbinterval east_asian_fw[] = {\n";
21 foreach my $line (<ARGV>)
23 chomp $line;
24 $line =~ s/\s*#.*$//;
25 next if $line eq '';
26 my ($codepoint, $width) = split ';', $line;
28 if ($codepoint =~ /\.\./)
30 ($first, $last) = split /\.\./, $codepoint;
32 else
34 $first = $last = $codepoint;
37 ($first, $last) = map(hex, ($first, $last));
39 if ($width eq 'F' || $width eq 'W')
41 # fullwidth/wide characters
42 if (!defined($range_start))
44 # save for start of range if one hasn't been started yet
45 $range_start = $first;
47 elsif ($first != $prev_last + 1)
49 # ranges aren't contiguous; emit the last and start a new one
50 printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
51 $range_start = $first;
54 else
56 # not wide characters, print out previous range if any
57 if (defined($range_start))
59 printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
60 $range_start = undef;
64 continue
66 $prev_last = $last;
69 # don't forget any ranges at the very end of the database (though there are none
70 # as of Unicode 13.0)
71 if (defined($range_start))
73 printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
76 print "};\n";