contrib/tzdata/zishrink.awk

   1 # Convert tzdata source into a smaller version of itself.
   2
   3 # Contributed by Paul Eggert.  This file is in the public domain.
   4
   5 # This is not a general-purpose converter; it is designed for current tzdata.
   6 # 'zic' should treat this script's output as if it were identical to
   7 # this script's input.
   8
   9
  10 # Return a new rule name.
  11 # N_RULE_NAMES keeps track of how many rule names have been generated.
  12
  13 function gen_rule_name(alphabet, base, rule_name, n, digit)
  14 {
  15   alphabet = ""
  16   alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  17   alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
  18   alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
  19   base = length(alphabet)
  20   rule_name = ""
  21   n = n_rule_names++
  22
  23   do {
  24     n -= rule_name && n <= base
  25     digit = n % base
  26     rule_name = substr(alphabet, digit + 1, 1) rule_name
  27     n = (n - digit) / base
  28   } while (n);
  29
  30   return rule_name
  31 }
  32
  33 # Process an input line and save it for later output.
  34
  35 function process_input_line(line, field, end, i, n, startdef)
  36 {
  37   # Remove comments, normalize spaces, and append a space to each line.
  38   sub(/#.*/, "", line)
  39   line = line " "
  40   gsub(/[\f\r\t\v ]+/, " ", line)
  41
  42   # Abbreviate keywords.  Do not abbreviate "Link" to just "L",
  43   # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
  44   sub(/^Link /, "Li ", line)
  45   sub(/^Rule /, "R ", line)
  46   sub(/^Zone /, "Z ", line)
  47
  48   # SystemV rules are not needed.
  49   if (line ~ /^R SystemV /) return
  50
  51   # Replace FooAsia rules with the same rules without "Asia", as they
  52   # are duplicates.
  53   if (match(line, /[^ ]Asia /)) {
  54     if (line ~ /^R /) return
  55     line = substr(line, 1, RSTART) substr(line, RSTART + 5)
  56   }
  57
  58   # Abbreviate times.
  59   while (match(line, /[: ]0+[0-9]/))
  60     line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
  61   while (match(line, /:0[^:]/))
  62     line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
  63
  64   # Abbreviate weekday names.  Do not abbreviate "Sun" and "Sat", as
  65   # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
  66   while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
  67     end = RSTART + RLENGTH
  68     line = substr(line, 1, end - 4) substr(line, end - 1)
  69   }
  70   while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
  71     end = RSTART + RLENGTH
  72     line = substr(line, 1, end - 3) substr(line, end - 1)
  73   }
  74
  75   # Abbreviate "max", "only" and month names.
  76   # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
  77   # as ambiguous.
  78   gsub(/ max /, " ma ", line)
  79   gsub(/ only /, " o ", line)
  80   gsub(/ Jan /, " Ja ", line)
  81   gsub(/ Feb /, " F ", line)
  82   gsub(/ Apr /, " Ap ", line)
  83   gsub(/ Aug /, " Au ", line)
  84   gsub(/ Sep /, " S ", line)
  85   gsub(/ Oct /, " O ", line)
  86   gsub(/ Nov /, " N ", line)
  87   gsub(/ Dec /, " D ", line)
  88
  89   # Strip leading and trailing space.
  90   sub(/^ /, "", line)
  91   sub(/ $/, "", line)
  92
  93   # Remove unnecessary trailing zero fields.
  94   sub(/ 0+$/, "", line)
  95
  96   # Remove unnecessary trailing days-of-month "1".
  97   if (match(line, /[A-Za-z] 1$/))
  98     line = substr(line, 1, RSTART)
  99
 100   # Remove unnecessary trailing " Ja" (for January).
 101   sub(/ Ja$/, "", line)
 102
 103   n = split(line, field)
 104
 105   # Abbreviate rule names.
 106   i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
 107   if (i && field[i] ~ /^[^-+0-9]/) {
 108     if (!rule[field[i]])
 109       rule[field[i]] = gen_rule_name()
 110     field[i] = rule[field[i]]
 111   }
 112
 113   # If this zone supersedes an earlier one, delete the earlier one
 114   # from the saved output lines.
 115   startdef = ""
 116   if (field[1] == "Z")
 117     zonename = startdef = field[2]
 118   else if (field[1] == "Li")
 119     zonename = startdef = field[3]
 120   else if (field[1] == "R")
 121     zonename = ""
 122   if (startdef) {
 123     i = zonedef[startdef]
 124     if (i) {
 125       do
 126         output_line[i - 1] = ""
 127       while (output_line[i++] ~ /^[-+0-9]/);
 128     }
 129   }
 130   zonedef[zonename] = nout + 1
 131
 132   # Save the line for later output.
 133   line = field[1]
 134   for (i = 2; i <= n; i++)
 135     line = line " " field[i]
 136   output_line[nout++] = line
 137 }
 138
 139 function output_saved_lines(i)
 140 {
 141   for (i = 0; i < nout; i++)
 142     if (output_line[i])
 143       print output_line[i]
 144 }
 145
 146 BEGIN {
 147   print "# version", version
 148   print "# This zic input file is in the public domain."
 149 }
 150
 151 /^[\f\r\t\v ]*[^#\f\r\t\v ]/ {
 152   process_input_line($0)
 153 }
 154
 155 END {
 156   output_saved_lines()
 157 }