tzdata: update to 2018g
[unleashed.git] / contrib / tzdata / zishrink.awk
blob8876b68a095cf87c6c4d36e991fcf38769921e70
1 # Convert tzdata source into a smaller version of itself.
3 # Contributed by Paul Eggert. This file is in the public domain.
5 # This is not a general-purpose converter; it is designed for current tzdata.
6 # 'zic' should treat this script's output as if it were identical to
7 # this script's input.
9 # Record a hash N for the new name NAME, checking for collisions.
11 function record_hash(n, name)
13 if (used_hashes[n]) {
14 printf "# ! collision: %s %s\n", used_hashes[n], name
15 exit 1
17 used_hashes[n] = name
20 # Return a shortened rule name representing NAME,
21 # and record this relationship to the hash table.
23 function gen_rule_name(name, n)
25 # Use a simple memonic: the first two letters.
26 n = substr(name, 1, 2)
27 record_hash(n, name)
28 # printf "# %s = %s\n", n, name
29 return n
32 function prehash_rule_names(name)
34 # Rule names are not part of the tzdb API, so substitute shorter
35 # ones. Shortening them consistently from one release to the next
36 # simplifies comparison of the output. That being said, the
37 # 1-letter names below are not standardized in any way, and can
38 # change arbitrarily from one release to the next, as the main goal
39 # here is compression not comparison.
41 # Abbreviating these rules names to one letter saved the most space
42 # circa 2018e.
43 rule["Arg"] = "A"
44 rule["Brazil"] = "B"
45 rule["Canada"] = "C"
46 rule["Denmark"] = "D"
47 rule["EU"] = "E"
48 rule["France"] = "F"
49 rule["GB-Eire"] = "G"
50 rule["Halifax"] = "H"
51 rule["Italy"] = "I"
52 rule["Jordan"] = "J"
53 rule["Egypt"] = "K" # "Kemet" in ancient Egyptian
54 rule["Libya"] = "L"
55 rule["Morocco"] = "M"
56 rule["Neth"] = "N"
57 rule["Poland"] = "O" # arbitrary
58 rule["Palestine"] = "P"
59 rule["Cuba"] = "Q" # Its start sounds like "Q".
60 rule["Russia"] = "R"
61 rule["Syria"] = "S"
62 rule["Turkey"] = "T"
63 rule["Uruguay"] = "U"
64 rule["Vincennes"] = "V"
65 rule["Winn"] = "W"
66 rule["Mongol"] = "X" # arbitrary
67 rule["NT_YK"] = "Y"
68 rule["Zion"] = "Z"
69 rule["Austria"] = "a"
70 rule["Belgium"] = "b"
71 rule["C-Eur"] = "c"
72 rule["Algeria"] = "d" # country code DZ
73 rule["E-Eur"] = "e"
74 rule["Taiwan"] = "f" # Formosa
75 rule["Greece"] = "g"
76 rule["Hungary"] = "h"
77 rule["Iran"] = "i"
78 rule["StJohns"] = "j"
79 rule["Chatham"] = "k" # arbitrary
80 rule["Lebanon"] = "l"
81 rule["Mexico"] = "m"
82 rule["Tunisia"] = "n" # country code TN
83 rule["Moncton"] = "o" # arbitrary
84 rule["Port"] = "p"
85 rule["Albania"] = "q" # arbitrary
86 rule["Regina"] = "r"
87 rule["Spain"] = "s"
88 rule["Toronto"] = "t"
89 rule["US"] = "u"
90 rule["Louisville"] = "v" # ville
91 rule["Iceland"] = "w" # arbitrary
92 rule["Chile"] = "x" # arbitrary
93 rule["Para"] = "y" # country code PY
94 rule["Romania"] = "z" # arbitrary
95 rule["Macau"] = "_" # arbitrary
97 # Use ISO 3166 alpha-2 country codes for remaining names that are countries.
98 # This is more systematic, and avoids collisions (e.g., Malta and Moldova).
99 rule["Armenia"] = "AM"
100 rule["Aus"] = "AU"
101 rule["Azer"] = "AZ"
102 rule["Barb"] = "BB"
103 rule["Dhaka"] = "BD"
104 rule["Bulg"] = "BG"
105 rule["Bahamas"] = "BS"
106 rule["Belize"] = "BZ"
107 rule["Swiss"] = "CH"
108 rule["Cook"] = "CK"
109 rule["PRC"] = "CN"
110 rule["Cyprus"] = "CY"
111 rule["Czech"] = "CZ"
112 rule["Germany"] = "DE"
113 rule["DR"] = "DO"
114 rule["Ecuador"] = "EC"
115 rule["Finland"] = "FI"
116 rule["Fiji"] = "FJ"
117 rule["Falk"] = "FK"
118 rule["Ghana"] = "GH"
119 rule["Guat"] = "GT"
120 rule["Hond"] = "HN"
121 rule["Haiti"] = "HT"
122 rule["Eire"] = "IE"
123 rule["Iraq"] = "IQ"
124 rule["Japan"] = "JP"
125 rule["Kyrgyz"] = "KG"
126 rule["ROK"] = "KR"
127 rule["Latvia"] = "LV"
128 rule["Lux"] = "LX"
129 rule["Moldova"] = "MD"
130 rule["Malta"] = "MT"
131 rule["Mauritius"] = "MU"
132 rule["Namibia"] = "NA"
133 rule["Nic"] = "NI"
134 rule["Norway"] = "NO"
135 rule["Peru"] = "PE"
136 rule["Phil"] = "PH"
137 rule["Pakistan"] = "PK"
138 rule["Sudan"] = "SD"
139 rule["Salv"] = "SV"
140 rule["Tonga"] = "TO"
141 rule["Vanuatu"] = "VU"
143 # Avoid collisions.
144 rule["Detroit"] = "Dt" # De = Denver
146 for (name in rule) {
147 record_hash(rule[name], name)
151 # Process an input line and save it for later output.
153 function process_input_line(line, field, end, i, n, startdef)
155 # Remove comments, normalize spaces, and append a space to each line.
156 sub(/#.*/, "", line)
157 line = line " "
158 gsub(/[\t ]+/, " ", line)
160 # Abbreviate keywords. Do not abbreviate "Link" to just "L",
161 # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
162 sub(/^Link /, "Li ", line)
163 sub(/^Rule /, "R ", line)
164 sub(/^Zone /, "Z ", line)
166 # SystemV rules are not needed.
167 if (line ~ /^R SystemV /) return
169 # Replace FooAsia rules with the same rules without "Asia", as they
170 # are duplicates.
171 if (match(line, /[^ ]Asia /)) {
172 if (line ~ /^R /) return
173 line = substr(line, 1, RSTART) substr(line, RSTART + 5)
176 # Abbreviate times.
177 while (match(line, /[: ]0+[0-9]/))
178 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
179 while (match(line, /:0[^:]/))
180 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
182 # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as
183 # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
184 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
185 end = RSTART + RLENGTH
186 line = substr(line, 1, end - 4) substr(line, end - 1)
188 while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
189 end = RSTART + RLENGTH
190 line = substr(line, 1, end - 3) substr(line, end - 1)
193 # Abbreviate "max", "only" and month names.
194 # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
195 # as ambiguous.
196 gsub(/ max /, " ma ", line)
197 gsub(/ only /, " o ", line)
198 gsub(/ Jan /, " Ja ", line)
199 gsub(/ Feb /, " F ", line)
200 gsub(/ Apr /, " Ap ", line)
201 gsub(/ Aug /, " Au ", line)
202 gsub(/ Sep /, " S ", line)
203 gsub(/ Oct /, " O ", line)
204 gsub(/ Nov /, " N ", line)
205 gsub(/ Dec /, " D ", line)
207 # Strip leading and trailing space.
208 sub(/^ /, "", line)
209 sub(/ $/, "", line)
211 # Remove unnecessary trailing zero fields.
212 sub(/ 0+$/, "", line)
214 # Remove unnecessary trailing days-of-month "1".
215 if (match(line, /[A-Za-z] 1$/))
216 line = substr(line, 1, RSTART)
218 # Remove unnecessary trailing " Ja" (for January).
219 sub(/ Ja$/, "", line)
221 n = split(line, field)
223 # Abbreviate rule names.
224 i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
225 if (i && field[i] ~ /^[^-+0-9]/) {
226 if (!rule[field[i]])
227 rule[field[i]] = gen_rule_name(field[i])
228 field[i] = rule[field[i]]
231 # If this zone supersedes an earlier one, delete the earlier one
232 # from the saved output lines.
233 startdef = ""
234 if (field[1] == "Z")
235 zonename = startdef = field[2]
236 else if (field[1] == "Li")
237 zonename = startdef = field[3]
238 else if (field[1] == "R")
239 zonename = ""
240 if (startdef) {
241 i = zonedef[startdef]
242 if (i) {
244 output_line[i - 1] = ""
245 while (output_line[i++] ~ /^[-+0-9]/);
248 zonedef[zonename] = nout + 1
250 # Save the line for later output.
251 line = field[1]
252 for (i = 2; i <= n; i++)
253 line = line " " field[i]
254 output_line[nout++] = line
257 function output_saved_lines(i)
259 for (i = 0; i < nout; i++)
260 if (output_line[i])
261 print output_line[i]
264 BEGIN {
265 # Files that the output normally depends on.
266 default_dep["africa"] = 1
267 default_dep["antarctica"] = 1
268 default_dep["asia"] = 1
269 default_dep["australasia"] = 1
270 default_dep["backward"] = 1
271 default_dep["etcetera"] = 1
272 default_dep["europe"] = 1
273 default_dep["factory"] = 1
274 default_dep["northamerica"] = 1
275 default_dep["southamerica"] = 1
276 default_dep["systemv"] = 1
277 default_dep["ziguard.awk"] = 1
278 default_dep["zishrink.awk"] = 1
280 # Output a version string from 'version' and related configuration variables
281 # supported by tzdb's Makefile. If you change the makefile or any other files
282 # that affect the output of this script, you should append '-SOMETHING'
283 # to the contents of 'version', where SOMETHING identifies what was changed.
285 ndeps = split(deps, dep)
286 ddeps = ""
287 for (i = 1; i <= ndeps; i++) {
288 if (default_dep[dep[i]]) {
289 default_dep[dep[i]]++
290 } else {
291 ddeps = ddeps " " dep[i]
294 for (d in default_dep) {
295 if (default_dep[d] == 1) {
296 ddeps = ddeps " !" d
299 print "# version", version
300 if (dataform != "main") {
301 print "# dataform", dataform
303 if (redo != "posix_right") {
304 print "# redo " redo
306 if (ddeps) {
307 print "# ddeps" ddeps
309 print "# This zic input file is in the public domain."
311 prehash_rule_names()
314 /^[\t ]*[^#\t ]/ {
315 process_input_line($0)
318 END {
319 output_saved_lines()