2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 # Update the icu4x binary data for a given release:
9 # Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name>
10 # update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.2.0 43.0.0 release-73-1
12 # Update to the main branch:
13 # Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name>
14 # update-icu4x.sh https://github.com/unicode-org/icu4x.git main 43.0.0 release-73-1
18 icuexport
=${4:-release-73-1}
21 echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name>"
22 echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.2.0 43.0.0 release-73-1"
26 # Make a log function so the output is easy to read.
30 printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
33 # Specify locale and time zone information for consistent output and reproduceability.
35 export LANG
=en_US.UTF-8
37 export LC_ALL
=en_US.UTF-8
39 # Define all of the paths.
41 top_src_dir
=$
(cd -- "$(dirname "$0")/.." >/dev
/null
2>&1 ; pwd -P)
42 data_dir
=${top_src_dir}/intl
/icu_testdata
/data
/baked
43 git_info_file
=${data_dir}/ICU4X-GIT-INFO
45 log
"Remove the old data"
49 tmpclonedir
=$
(mktemp
-d)
50 git clone
--depth 1 --branch $2 $1 ${tmpclonedir}
52 log
"Change the directory to the cloned repo"
56 log
"Patching line segmenter data to fix https://github.com/unicode-org/icu4x/issues/3811."
57 # This manually patch can be removed once we upgrade to ICU4X 1.3 in bug 1851323.
58 wget
--unlink -q -O ${tmpclonedir}/provider
/datagen
/data
/segmenter
/line.toml https
://raw.githubusercontent.com
/unicode-org
/icu4x
/fd62de5e232ea1f0cb81e88dc6eb0cf9c86f85ca
/provider
/datagen
/src
/transform
/segmenter
/rules
/line.toml
60 log
"Run the icu4x-datagen tool to regenerate the data."
61 log
"Saving the data into: ${data_dir}"
63 # TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
64 # data builder is using any locale filtering.
67 # Include this resource key in the output. Accepts multiple arguments.
68 # --key-file <KEY_FILE>
69 # Path to text file with resource keys to include, one per line. Empty lines and
70 # lines starting with '#' are ignored.
71 cargo run
--bin icu4x-datagen \
75 --icuexport-tag ${icuexport} \
76 --keys segmenter
/dictionary
/w_auto@
1 \
77 --keys segmenter
/grapheme@
1 \
78 --keys segmenter
/line@
1 \
79 --keys segmenter
/lstm
/wl_auto@
1 \
80 --keys segmenter
/sentence@
1 \
81 --keys segmenter
/word@
1 \
83 --use-separate-crates \
87 log
"Record the current cloned git information to:"
89 # (This ensures that if ICU modifications are performed properly, it's always
90 # possible to run the command at the top of this script and make no changes to
92 git
-C ${tmpclonedir} log
-1 > ${git_info_file}
94 log
"Clean up the tmp directory"