Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / contrib / tzdata / zishrink.awk
blob23d623e99d8237fc1a10f02379ebbf61c56ea189
1 # Convert tzdata source into a smaller version of itself.
3 # Contributed by Paul Eggert. This file is in the public domain.
5 # This is not a general-purpose converter; it is designed for current tzdata.
6 # 'zic' should treat this script's output as if it were identical to
7 # this script's input.
10 # Return a new rule name.
11 # N_RULE_NAMES keeps track of how many rule names have been generated.
13 function gen_rule_name(alphabet, base, rule_name, n, digit)
15 alphabet = ""
16 alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
17 alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
18 alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
19 base = length(alphabet)
20 rule_name = ""
21 n = n_rule_names++
23 do {
24 n -= rule_name && n <= base
25 digit = n % base
26 rule_name = substr(alphabet, digit + 1, 1) rule_name
27 n = (n - digit) / base
28 } while (n);
30 return rule_name
33 # Process an input line and save it for later output.
35 function process_input_line(line, field, end, i, n, startdef)
37 # Remove comments, normalize spaces, and append a space to each line.
38 sub(/#.*/, "", line)
39 line = line " "
40 gsub(/[\f\r\t\v ]+/, " ", line)
42 # Abbreviate keywords. Do not abbreviate "Link" to just "L",
43 # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
44 sub(/^Link /, "Li ", line)
45 sub(/^Rule /, "R ", line)
46 sub(/^Zone /, "Z ", line)
48 # SystemV rules are not needed.
49 if (line ~ /^R SystemV /) return
51 # Replace FooAsia rules with the same rules without "Asia", as they
52 # are duplicates.
53 if (match(line, /[^ ]Asia /)) {
54 if (line ~ /^R /) return
55 line = substr(line, 1, RSTART) substr(line, RSTART + 5)
58 # Abbreviate times.
59 while (match(line, /[: ]0+[0-9]/))
60 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
61 while (match(line, /:0[^:]/))
62 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
64 # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as
65 # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
66 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
67 end = RSTART + RLENGTH
68 line = substr(line, 1, end - 4) substr(line, end - 1)
70 while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
71 end = RSTART + RLENGTH
72 line = substr(line, 1, end - 3) substr(line, end - 1)
75 # Abbreviate "max", "only" and month names.
76 # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
77 # as ambiguous.
78 gsub(/ max /, " ma ", line)
79 gsub(/ only /, " o ", line)
80 gsub(/ Jan /, " Ja ", line)
81 gsub(/ Feb /, " F ", line)
82 gsub(/ Apr /, " Ap ", line)
83 gsub(/ Aug /, " Au ", line)
84 gsub(/ Sep /, " S ", line)
85 gsub(/ Oct /, " O ", line)
86 gsub(/ Nov /, " N ", line)
87 gsub(/ Dec /, " D ", line)
89 # Strip leading and trailing space.
90 sub(/^ /, "", line)
91 sub(/ $/, "", line)
93 # Remove unnecessary trailing zero fields.
94 sub(/ 0+$/, "", line)
96 # Remove unnecessary trailing days-of-month "1".
97 if (match(line, /[A-Za-z] 1$/))
98 line = substr(line, 1, RSTART)
100 # Remove unnecessary trailing " Ja" (for January).
101 sub(/ Ja$/, "", line)
103 n = split(line, field)
105 # Abbreviate rule names.
106 i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
107 if (i && field[i] ~ /^[^-+0-9]/) {
108 if (!rule[field[i]])
109 rule[field[i]] = gen_rule_name()
110 field[i] = rule[field[i]]
113 # If this zone supersedes an earlier one, delete the earlier one
114 # from the saved output lines.
115 startdef = ""
116 if (field[1] == "Z")
117 zonename = startdef = field[2]
118 else if (field[1] == "Li")
119 zonename = startdef = field[3]
120 else if (field[1] == "R")
121 zonename = ""
122 if (startdef) {
123 i = zonedef[startdef]
124 if (i) {
126 output_line[i - 1] = ""
127 while (output_line[i++] ~ /^[-+0-9]/);
130 zonedef[zonename] = nout + 1
132 # Save the line for later output.
133 line = field[1]
134 for (i = 2; i <= n; i++)
135 line = line " " field[i]
136 output_line[nout++] = line
139 function output_saved_lines(i)
141 for (i = 0; i < nout; i++)
142 if (output_line[i])
143 print output_line[i]
146 BEGIN {
147 print "# version", version
148 print "# This zic input file is in the public domain."
151 /^[\f\r\t\v ]*[^#\f\r\t\v ]/ {
152 process_input_line($0)
155 END {
156 output_saved_lines()