Backed out changeset cfe0bbc666b8 (bug 1784757) in order to wait some more for a...
[gecko.git] / intl / update-icu4x.sh
bloba0f5dac37aeef4f5704fc23a5410122d346a704d
1 #!/bin/sh
2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 set -e
8 # Update the icu4x binary data for a given release:
9 # Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name>
10 # update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.2.0 43.0.0 release-73-1
12 # Update to the main branch:
13 # Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name>
14 # update-icu4x.sh https://github.com/unicode-org/icu4x.git main 43.0.0 release-73-1
16 # default
17 cldr=${3:-43.0.0}
18 icuexport=${4:-release-73-1}
20 if [ $# -lt 2 ]; then
21 echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name>"
22 echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.2.0 43.0.0 release-73-1"
23 exit 1
26 # Make a log function so the output is easy to read.
27 log() {
28 CYAN='\033[0;36m'
29 CLEAR='\033[0m'
30 printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
33 # Specify locale and time zone information for consistent output and reproduceability.
34 export TZ=UTC
35 export LANG=en_US.UTF-8
36 export LANGUAGE=en_US
37 export LC_ALL=en_US.UTF-8
39 # Define all of the paths.
40 original_pwd=$(pwd)
41 top_src_dir=$(cd -- "$(dirname "$0")/.." >/dev/null 2>&1 ; pwd -P)
42 data_dir=${top_src_dir}/intl/icu_testdata/data/baked
43 git_info_file=${data_dir}/ICU4X-GIT-INFO
45 log "Remove the old data"
46 rm -rf ${data_dir}
48 log "Clone ICU4X"
49 tmpclonedir=$(mktemp -d)
50 git clone --depth 1 --branch $2 $1 ${tmpclonedir}
52 log "Change the directory to the cloned repo"
53 log ${tmpclonedir}
54 cd ${tmpclonedir}
56 log "Patching line segmenter data to fix https://github.com/unicode-org/icu4x/issues/3811."
57 # This manually patch can be removed once we upgrade to ICU4X 1.3 in bug 1851323.
58 wget --unlink -q -O ${tmpclonedir}/provider/datagen/data/segmenter/line.toml https://raw.githubusercontent.com/unicode-org/icu4x/fd62de5e232ea1f0cb81e88dc6eb0cf9c86f85ca/provider/datagen/src/transform/segmenter/rules/line.toml
60 log "Run the icu4x-datagen tool to regenerate the data."
61 log "Saving the data into: ${data_dir}"
63 # TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
64 # data builder is using any locale filtering.
66 # --keys <KEYS>...
67 # Include this resource key in the output. Accepts multiple arguments.
68 # --key-file <KEY_FILE>
69 # Path to text file with resource keys to include, one per line. Empty lines and
70 # lines starting with '#' are ignored.
71 cargo run --bin icu4x-datagen \
72 --features=bin \
73 -- \
74 --cldr-tag ${cldr} \
75 --icuexport-tag ${icuexport} \
76 --keys segmenter/dictionary/w_auto@1 \
77 --keys segmenter/grapheme@1 \
78 --keys segmenter/line@1 \
79 --keys segmenter/lstm/wl_auto@1 \
80 --keys segmenter/sentence@1 \
81 --keys segmenter/word@1 \
82 --all-locales \
83 --use-separate-crates \
84 --format mod \
85 --out ${data_dir} \
87 log "Record the current cloned git information to:"
88 log ${git_info_file}
89 # (This ensures that if ICU modifications are performed properly, it's always
90 # possible to run the command at the top of this script and make no changes to
91 # the tree.)
92 git -C ${tmpclonedir} log -1 > ${git_info_file}
94 log "Clean up the tmp directory"
95 cd ${original_pwd}
96 rm -rf ${tmpclonedir}