2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 # Update the icu4x binary data for a given release:
9 # Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
10 # update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.4.0 44.0.0 release-74-1 1.4.0
12 # Update to the main branch:
13 # Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
14 # update-icu4x.sh https://github.com/unicode-org/icu4x.git main 44.0.0 release-74-1 1.4.0
18 icuexport
=${4:-release-74-1}
19 icu4x_version
=${5:-1.4.0}
22 echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name> <ICU4X version for icu_capi>"
23 echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.4.0 44.0.0 release-74-1 1.4.0"
27 # Make a log function so the output is easy to read.
31 printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
34 # Specify locale and time zone information for consistent output and reproduceability.
36 export LANG
=en_US.UTF-8
38 export LC_ALL
=en_US.UTF-8
40 # Define all of the paths.
42 top_src_dir
=$
(cd -- "$(dirname "$0")/.." >/dev
/null
2>&1 ; pwd -P)
43 segmenter_data_dir
=${top_src_dir}/intl
/icu_segmenter_data
/data
44 git_info_file
=${segmenter_data_dir}/ICU4X-GIT-INFO
46 log
"Remove the old data"
47 rm -rf ${segmenter_data_dir}
49 log
"Download icuexportdata"
50 tmpicuexportdir
=$
(mktemp
-d)
51 icuexport_filename
=`echo "icuexportdata_${icuexport}.zip" | sed "s/\//-/g"`
53 wget https
://github.com
/unicode-org
/icu
/releases
/download
/${icuexport}/${icuexport_filename}
55 log
"Patching icuexportdata to reduce data size"
56 unzip ${icuexport_filename}
63 cp ${top_src_dir}/intl
/icu4x-patches
/empty.toml
${tmpicuexportdir}/segmenter
/dictionary
/$toml
67 tmpclonedir
=$
(mktemp
-d)
68 git clone
--depth 1 --branch $2 $1 ${tmpclonedir}
70 log
"Change the directory to the cloned repo"
74 log
"Patching line segmenter data to fix https://github.com/unicode-org/icu4x/pull/4389"
75 # This manually patch can be removed once we upgrade to ICU4X 1.5
76 wget
--unlink -q -O ${tmpclonedir}/provider
/datagen
/data
/segmenter
/line.toml https
://raw.githubusercontent.com
/unicode-org
/icu4x
/e080ecd12e38d6aecc99cd0cfe8c21595c4ce6ff
/provider
/datagen
/data
/segmenter
/line.toml
78 log
"Copy icu_capi crate to local since we need a patched version"
79 rm -rf ${top_src_dir}/intl
/icu_capi
80 wget
-O icu_capi.
tar.gz https
://crates.io
/api
/v
1/crates
/icu_capi
/${icu4x_version}/download
81 tar xf icu_capi.
tar.gz
-C ${top_src_dir}/intl
82 mv ${top_src_dir}/intl/icu_capi-${icu4x_version} ${top_src_dir}/intl
/icu_capi
83 rm -rf icu_capi_tar.gz
85 log
"Patching icu_capi"
87 001-Cargo.toml.
patch \
92 patch -d ${top_src_dir} -p1 --no-backup-if-mismatch < ${top_src_dir}/intl
/icu4x-patches
/$patch
95 # ICU4X 1.3 or later with icu_capi uses each compiled_data crate.
97 log
"Run the icu4x-datagen tool to regenerate the segmenter data."
98 log
"Saving the data into: ${segmenter_data_dir}"
100 # TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
101 # data builder is using any locale filtering.
104 # Include this resource key in the output. Accepts multiple arguments.
105 # --key-file <KEY_FILE>
106 # Path to text file with resource keys to include, one per line. Empty lines and
107 # lines starting with '#' are ignored.
108 cargo run
--bin icu4x-datagen \
112 --icuexport-root ${tmpicuexportdir} \
113 --keys segmenter
/dictionary
/w_auto@
1 \
114 --keys segmenter
/dictionary
/wl_ext@
1 \
115 --keys segmenter
/grapheme@
1 \
116 --keys segmenter
/line@
1 \
117 --keys segmenter
/lstm
/wl_auto@
1 \
118 --keys segmenter
/sentence@
1 \
119 --keys segmenter
/word@
1 \
122 --out ${segmenter_data_dir} \
124 log
"Record the current cloned git information to:"
126 # (This ensures that if ICU modifications are performed properly, it's always
127 # possible to run the command at the top of this script and make no changes to
129 git
-C ${tmpclonedir} log
-1 > ${git_info_file}
131 log
"Clean up the tmp directory"
133 rm -rf ${tmpclonedir}
134 rm -rf ${tmpicuexportdir}