Bug 1869647 - Mark hasStorageAccess.sub.https.window.html as intermittent after wpt...
[gecko.git] / intl / update-icu4x.sh
blob6175072ea0b44a458ed625700cf0306a6b4564d9
1 #!/bin/sh
2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 set -e
8 # Update the icu4x binary data for a given release:
9 # Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
10 # update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.4.0 44.0.0 release-74-1 1.4.0
12 # Update to the main branch:
13 # Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
14 # update-icu4x.sh https://github.com/unicode-org/icu4x.git main 44.0.0 release-74-1 1.4.0
16 # default
17 cldr=${3:-44.0.0}
18 icuexport=${4:-release-74-1}
19 icu4x_version=${5:-1.4.0}
21 if [ $# -lt 2 ]; then
22 echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name> <ICU4X version for icu_capi>"
23 echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.4.0 44.0.0 release-74-1 1.4.0"
24 exit 1
27 # Make a log function so the output is easy to read.
28 log() {
29 CYAN='\033[0;36m'
30 CLEAR='\033[0m'
31 printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
34 # Specify locale and time zone information for consistent output and reproduceability.
35 export TZ=UTC
36 export LANG=en_US.UTF-8
37 export LANGUAGE=en_US
38 export LC_ALL=en_US.UTF-8
40 # Define all of the paths.
41 original_pwd=$(pwd)
42 top_src_dir=$(cd -- "$(dirname "$0")/.." >/dev/null 2>&1 ; pwd -P)
43 segmenter_data_dir=${top_src_dir}/intl/icu_segmenter_data/data
44 git_info_file=${segmenter_data_dir}/ICU4X-GIT-INFO
46 log "Remove the old data"
47 rm -rf ${segmenter_data_dir}
49 log "Download icuexportdata"
50 tmpicuexportdir=$(mktemp -d)
51 icuexport_filename=`echo "icuexportdata_${icuexport}.zip" | sed "s/\//-/g"`
52 cd ${tmpicuexportdir}
53 wget https://github.com/unicode-org/icu/releases/download/${icuexport}/${icuexport_filename}
55 log "Patching icuexportdata to reduce data size"
56 unzip ${icuexport_filename}
57 for toml in \
58 burmesedict.toml \
59 khmerdict.toml \
60 laodict.toml \
61 thaidict.toml \
62 ; do
63 cp ${top_src_dir}/intl/icu4x-patches/empty.toml ${tmpicuexportdir}/segmenter/dictionary/$toml
64 done
66 log "Clone ICU4X"
67 tmpclonedir=$(mktemp -d)
68 git clone --depth 1 --branch $2 $1 ${tmpclonedir}
70 log "Change the directory to the cloned repo"
71 log ${tmpclonedir}
72 cd ${tmpclonedir}
74 log "Copy icu_capi crate to local since we need a patched version"
75 rm -rf ${top_src_dir}/intl/icu_capi
76 wget -O icu_capi.tar.gz https://crates.io/api/v1/crates/icu_capi/${icu4x_version}/download
77 tar xf icu_capi.tar.gz -C ${top_src_dir}/intl
78 mv ${top_src_dir}/intl/icu_capi-${icu4x_version} ${top_src_dir}/intl/icu_capi
79 rm -rf icu_capi_tar.gz
81 log "Patching icu_capi"
82 for patch in \
83 001-Cargo.toml.patch \
84 002-GH4109.patch \
85 003-explicit.patch \
86 ; do
87 patch -d ${top_src_dir} -p1 --no-backup-if-mismatch < ${top_src_dir}/intl/icu4x-patches/$patch
88 done
90 # ICU4X 1.3 or later with icu_capi uses each compiled_data crate.
92 log "Run the icu4x-datagen tool to regenerate the segmenter data."
93 log "Saving the data into: ${segmenter_data_dir}"
95 # TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
96 # data builder is using any locale filtering.
98 # --keys <KEYS>...
99 # Include this resource key in the output. Accepts multiple arguments.
100 # --key-file <KEY_FILE>
101 # Path to text file with resource keys to include, one per line. Empty lines and
102 # lines starting with '#' are ignored.
103 cargo run --bin icu4x-datagen \
104 --features=bin \
105 -- \
106 --cldr-tag ${cldr} \
107 --icuexport-root ${tmpicuexportdir} \
108 --keys segmenter/dictionary/w_auto@1 \
109 --keys segmenter/dictionary/wl_ext@1 \
110 --keys segmenter/grapheme@1 \
111 --keys segmenter/line@1 \
112 --keys segmenter/lstm/wl_auto@1 \
113 --keys segmenter/sentence@1 \
114 --keys segmenter/word@1 \
115 --all-locales \
116 --format mod \
117 --out ${segmenter_data_dir} \
119 log "Record the current cloned git information to:"
120 log ${git_info_file}
121 # (This ensures that if ICU modifications are performed properly, it's always
122 # possible to run the command at the top of this script and make no changes to
123 # the tree.)
124 git -C ${tmpclonedir} log -1 > ${git_info_file}
126 log "Clean up the tmp directory"
127 cd ${original_pwd}
128 rm -rf ${tmpclonedir}
129 rm -rf ${tmpicuexportdir}