1 # Convert tzdata source into a smaller version of itself.
3 # Contributed by Paul Eggert. This file is in the public domain.
5 # This is not a general-purpose converter; it is designed for current tzdata.
6 # 'zic' should treat this script's output as if it were identical to
9 # Record a hash N for the new name NAME, checking for collisions.
11 function record_hash
(n
, name
)
14 printf "# ! collision: %s %s\n", used_hashes
[n
], name
20 # Return a shortened rule name representing NAME,
21 # and record this relationship to the hash table.
23 function gen_rule_name
(name
, n
)
25 # Use a simple memonic: the first two letters.
26 n =
substr(name
, 1, 2)
28 # printf "# %s = %s\n", n, name
32 function prehash_rule_names
(name
)
34 # Rule names are not part of the tzdb API, so substitute shorter
35 # ones. Shortening them consistently from one release to the next
36 # simplifies comparison of the output. That being said, the
37 # 1-letter names below are not standardized in any way, and can
38 # change arbitrarily from one release to the next, as the main goal
39 # here is compression not comparison.
41 # Abbreviating these rules names to one letter saved the most space
53 rule
["Egypt"] =
"K" # "Kemet" in ancient Egyptian
57 rule
["Poland"] =
"O" # arbitrary
58 rule
["Palestine"] =
"P"
59 rule
["Cuba"] =
"Q" # Its start sounds like "Q".
64 rule
["Vincennes"] =
"V"
66 rule
["Mongol"] =
"X" # arbitrary
72 rule
["Algeria"] =
"d" # country code DZ
74 rule
["Taiwan"] =
"f" # Formosa
79 rule
["Chatham"] =
"k" # arbitrary
82 rule
["Tunisia"] =
"n" # country code TN
83 rule
["Moncton"] =
"o" # arbitrary
85 rule
["Albania"] =
"q" # arbitrary
90 rule
["Louisville"] =
"v" # ville
91 rule
["Iceland"] =
"w" # arbitrary
92 rule
["Chile"] =
"x" # arbitrary
93 rule
["Para"] =
"y" # country code PY
94 rule
["Romania"] =
"z" # arbitrary
95 rule
["Macau"] =
"_" # arbitrary
97 # Use ISO 3166 alpha-2 country codes for remaining names that are countries.
98 # This is more systematic, and avoids collisions (e.g., Malta and Moldova).
99 rule
["Armenia"] =
"AM"
105 rule
["Bahamas"] =
"BS"
106 rule
["Belize"] =
"BZ"
110 rule
["Cyprus"] =
"CY"
112 rule
["Germany"] =
"DE"
114 rule
["Ecuador"] =
"EC"
115 rule
["Finland"] =
"FI"
125 rule
["Kyrgyz"] =
"KG"
127 rule
["Latvia"] =
"LV"
129 rule
["Moldova"] =
"MD"
131 rule
["Mauritius"] =
"MU"
132 rule
["Namibia"] =
"NA"
134 rule
["Norway"] =
"NO"
137 rule
["Pakistan"] =
"PK"
141 rule
["Vanuatu"] =
"VU"
144 rule
["Detroit"] =
"Dt" # De = Denver
147 record_hash
(rule
[name
], name
)
151 # Process an input line and save it for later output.
153 function process_input_line
(line
, field
, end, i
, n
, startdef
)
155 # Remove comments, normalize spaces, and append a space to each line.
158 gsub(/[\t ]+/, " ", line
)
160 # Abbreviate keywords. Do not abbreviate "Link" to just "L",
161 # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
162 sub(/^Link
/, "Li ", line
)
163 sub(/^Rule
/, "R ", line
)
164 sub(/^Zone
/, "Z ", line
)
166 # SystemV rules are not needed.
167 if (line ~
/^R SystemV
/) return
169 # Replace FooAsia rules with the same rules without "Asia", as they
171 if (match(line
, /[^
]Asia
/)) {
172 if (line ~
/^R
/) return
173 line =
substr(line
, 1, RSTART) substr(line
, RSTART + 5)
177 while (match(line
, /[: ]0+[0-9]/))
178 line =
substr(line
, 1, RSTART) substr(line
, RSTART + RLENGTH - 1)
179 while (match(line
, /:0[^
:]/))
180 line =
substr(line
, 1, RSTART - 1) substr(line
, RSTART + 2)
182 # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as
183 # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
184 while (match(line
, / (last
)?
(Mon
|Wed
|Fri
)[ <>]/)) {
185 end =
RSTART + RLENGTH
186 line =
substr(line
, 1, end - 4) substr(line
, end - 1)
188 while (match(line
, / (last
)?
(Tue
|Thu
)[ <>]/)) {
189 end =
RSTART + RLENGTH
190 line =
substr(line
, 1, end - 3) substr(line
, end - 1)
193 # Abbreviate "max", "only" and month names.
194 # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
196 gsub(/ max
/, " ma ", line
)
197 gsub(/ only
/, " o ", line
)
198 gsub(/ Jan
/, " Ja ", line
)
199 gsub(/ Feb
/, " F ", line
)
200 gsub(/ Apr
/, " Ap ", line
)
201 gsub(/ Aug
/, " Au ", line
)
202 gsub(/ Sep
/, " S ", line
)
203 gsub(/ Oct
/, " O ", line
)
204 gsub(/ Nov
/, " N ", line
)
205 gsub(/ Dec
/, " D ", line
)
207 # Strip leading and trailing space.
211 # Remove unnecessary trailing zero fields.
212 sub(/ 0+$
/, "", line
)
214 # Remove unnecessary trailing days-of-month "1".
215 if (match(line
, /[A
-Za
-z
] 1$
/))
216 line =
substr(line
, 1, RSTART)
218 # Remove unnecessary trailing " Ja" (for January).
219 sub(/ Ja$
/, "", line
)
221 n =
split(line
, field
)
223 # Abbreviate rule names.
224 i = field
[1] ==
"Z" ?
4 : field
[1] ==
"Li" ?
0 : 2
225 if (i
&& field
[i
] ~
/^
[^
-+0-9]/) {
227 rule
[field
[i
]] = gen_rule_name
(field
[i
])
228 field
[i
] = rule
[field
[i
]]
231 # If this zone supersedes an earlier one, delete the earlier one
232 # from the saved output lines.
235 zonename = startdef = field
[2]
236 else if (field
[1] ==
"Li")
237 zonename = startdef = field
[3]
238 else if (field
[1] ==
"R")
241 i = zonedef
[startdef
]
244 output_line
[i
- 1] =
""
245 while (output_line
[i
++] ~
/^
[-+0-9]/);
248 zonedef
[zonename
] = nout
+ 1
250 # Save the line for later output.
252 for (i =
2; i
<= n
; i
++)
253 line = line
" " field
[i
]
254 output_line
[nout
++] = line
257 function output_saved_lines
(i
)
259 for (i =
0; i
< nout
; i
++)
265 # Files that the output normally depends on.
266 default_dep
["africa"] =
1
267 default_dep
["antarctica"] =
1
268 default_dep
["asia"] =
1
269 default_dep
["australasia"] =
1
270 default_dep
["backward"] =
1
271 default_dep
["etcetera"] =
1
272 default_dep
["europe"] =
1
273 default_dep
["factory"] =
1
274 default_dep
["northamerica"] =
1
275 default_dep
["southamerica"] =
1
276 default_dep
["systemv"] =
1
277 default_dep
["ziguard.awk"] =
1
278 default_dep
["zishrink.awk"] =
1
280 # Output a version string from 'version' and related configuration variables
281 # supported by tzdb's Makefile. If you change the makefile or any other files
282 # that affect the output of this script, you should append '-SOMETHING'
283 # to the contents of 'version', where SOMETHING identifies what was changed.
285 ndeps =
split(deps
, dep
)
287 for (i =
1; i
<= ndeps
; i
++) {
288 if (default_dep
[dep
[i
]]) {
289 default_dep
[dep
[i
]]++
291 ddeps = ddeps
" " dep
[i
]
294 for (d in default_dep
) {
295 if (default_dep
[d
] ==
1) {
299 print "# version", version
300 if (dataform
!= "main") {
301 print "# dataform", dataform
303 if (redo
!= "posix_right") {
307 print "# ddeps" ddeps
309 print "# This zic input file is in the public domain."
315 process_input_line
($
0)