From 0e5eefbb06eafe7d79856742caafc3099bf00d85 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sat, 26 Jan 2019 17:02:13 -0700 Subject: [PATCH] awk: run using LC_ALL=C Where Girocco does use awk, it's only to do simple processing where only the US-ASCII portion of the codepoint space is relevant. However, if the system character set is set to a multi-byte charset (such as UTF-8) and the input contains characters with the high bit set (such as ISO-8859-1) then the character reading mechanism can barf unnecessarily for a reason that's simply irrelevant to Girocco. Avoid this by making sure that awk runs under an "LC_ALL=C" environment so that the input stream is treated as single octet units where codepoints in the range 0-0x7F mean US-ASCII. Not only is this faster, but it's the correct processing mode for Girocco. For the most part this had already been taken care of, but this just handles some instances where it's been previously overlooked. Signed-off-by: Kyle J. McKay --- bin/update-pwd-db | 2 +- chrootsetup_dragonfly.sh | 6 +++--- chrootsetup_freebsd.sh | 6 +++--- chrootsetup_linux.sh | 2 +- jobd/gc.sh | 6 +++--- taskd/clone.sh | 2 +- toolbox/reports/project-disk-use.sh | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/update-pwd-db b/bin/update-pwd-db index 91e3f2a..05a0d6c 100755 --- a/bin/update-pwd-db +++ b/bin/update-pwd-db @@ -30,6 +30,6 @@ cleanup() { } >/dev/null 2>&1 trap cleanup EXIT rm -f etc/master.passwd -awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' <"$1" >etc/master.passwd +LC_ALL=C awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' <"$1" >etc/master.passwd PW_SCAN_BIG_IDS=1 pwd_mkdb -d etc $user_only etc/master.passwd 2>/dev/null exit 0 diff --git a/chrootsetup_dragonfly.sh b/chrootsetup_dragonfly.sh index 58f9120..152fbe8 100644 --- a/chrootsetup_dragonfly.sh +++ b/chrootsetup_dragonfly.sh @@ -71,7 +71,7 @@ pull_in_lib() { dst="${2%/}/$(basename "$1")" if [ ! -e "$dst" ] || [ "$1" -nt "$dst" ]; then cp_p "$1" "$dst" - for llib in $(ldd "$1" | grep '=>' | awk '{print $3}'); do + for llib in $(ldd "$1" | grep '=>' | LC_ALL=C awk '{print $3}'); do (pull_in_lib "$llib" lib) test $? -eq 0 done @@ -107,7 +107,7 @@ pull_in_bin() { fi cp_p "$bin" var/tmp/ # ...and all the dependencies. - for lib in $(ldd "$bin" | grep '=>' | awk '{print $3}'); do + for lib in $(ldd "$bin" | grep '=>' | LC_ALL=C awk '{print $3}'); do pull_in_lib "$lib" lib done mv -f "var/tmp/$(basename "$bin")" "$bdst/$bnam" @@ -122,7 +122,7 @@ chroot_update_permissions() { chown -R 0:0 bin lib sbin var libexec # bootstrap the master.passwd database rm -f etc/master.passwd etc/pwd.db etc/spwd.db - awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' etc/master.passwd + LC_ALL=C awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' etc/master.passwd PW_SCAN_BIG_IDS=1 pwd_mkdb -d etc etc/master.passwd 2>/dev/null chown $cfg_mirror_user:$cfg_owning_group etc/master.passwd etc/pwd.db etc/spwd.db chmod 0664 etc/master.passwd etc/pwd.db etc/spwd.db diff --git a/chrootsetup_freebsd.sh b/chrootsetup_freebsd.sh index d4d62fc..4d9c7cd 100644 --- a/chrootsetup_freebsd.sh +++ b/chrootsetup_freebsd.sh @@ -65,7 +65,7 @@ pull_in_lib() { dst="${2%/}/$(basename "$1")" if [ ! -e "$dst" ] || [ "$1" -nt "$dst" ]; then cp_p "$1" "$dst" - for llib in $(ldd "$1" | grep '=>' | awk '{print $3}'); do + for llib in $(ldd "$1" | grep '=>' | LC_ALL=C awk '{print $3}'); do (pull_in_lib "$llib" lib) test $? -eq 0 done @@ -101,7 +101,7 @@ pull_in_bin() { fi cp_p "$bin" var/tmp/ # ...and all the dependencies. - for lib in $(ldd "$bin" | grep '=>' | awk '{print $3}'); do + for lib in $(ldd "$bin" | grep '=>' | LC_ALL=C awk '{print $3}'); do pull_in_lib "$lib" lib done mv -f "var/tmp/$(basename "$bin")" "$bdst/$bnam" @@ -116,7 +116,7 @@ chroot_update_permissions() { chown -R 0:0 bin lib sbin var libexec # bootstrap the master.passwd database rm -f etc/master.passwd etc/pwd.db etc/spwd.db - awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' etc/master.passwd + LC_ALL=C awk -F ':' '{ print $1 ":" $2 ":" $3 ":" $4 "::0:0:" $5 ":" $6 ":" $7 }' etc/master.passwd PW_SCAN_BIG_IDS=1 pwd_mkdb -d etc etc/master.passwd 2>/dev/null chown $cfg_mirror_user:$cfg_owning_group etc/master.passwd etc/pwd.db etc/spwd.db chmod 0664 etc/master.passwd etc/pwd.db etc/spwd.db diff --git a/chrootsetup_linux.sh b/chrootsetup_linux.sh index 083ea6e..01f598a 100644 --- a/chrootsetup_linux.sh +++ b/chrootsetup_linux.sh @@ -60,7 +60,7 @@ has_files lib/ld-linux*.so* || { # Besides '=>' libs, attempt to pick up absolute path libs and create a symlink for upto one level deep extract_libs() { - ldd "$1" | grep -v -e linux-gate -e linux-vdso -e ld-linux | awk '{print $1 " " $2 " " $3}' | + ldd "$1" | grep -v -e linux-gate -e linux-vdso -e ld-linux | LC_ALL=C awk '{print $1 " " $2 " " $3}' | while read -r _f1 _f2 _f3; do case "$_f2" in "=>") diff --git a/jobd/gc.sh b/jobd/gc.sh index eb1e44b..d2e57fe 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -545,7 +545,7 @@ make_repack_dir() { fi if [ "${cfg_fetch_stash_refs:-0}" = "0" ]; then # migrate any refs/stash or refs/tgstash lines to repack/packed-refs.extra - >xtra; next; } @@ -1206,7 +1206,7 @@ compute_extra_reachables() { fi digits8='[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]' find -L reflogs -mindepth 1 -maxdepth 1 -type f -name "$digits8*" -exec gzip -c -d -f '{}' + | - awk '{print $2; print $3}' + LC_ALL=C awk '{print $2; print $3}' ! [ -f index ] || get_index_tree index if [ -d worktrees ]; then find -L worktrees -mindepth 2 -maxdepth 2 -name HEAD -type f -print | @@ -1217,7 +1217,7 @@ compute_extra_reachables() { fi } | LC_ALL=C sort -u | git cat-file ${var_have_git_260:+--buffer} --batch-check"${var_have_git_185:+=%(objectname)}" | - awk '!/missing/ {num++; print $1 " " "refs/" substr("zzzzzzzzzzzz", 1, length(num)) "/" num}' + LC_ALL=C awk '!/missing/ {num++; print $1 " " "refs/" substr("zzzzzzzzzzzz", 1, length(num)) "/" num}' } # diff --git a/taskd/clone.sh b/taskd/clone.sh index 297b304..e27ba35 100755 --- a/taskd/clone.sh +++ b/taskd/clone.sh @@ -196,7 +196,7 @@ echo "Mirroring from URL \"$url\"" echo "" if [ "$cfg_project_owners" = "source" ]; then - config set owner "$(ls -ldH "${url#file://}" 2>/dev/null | awk '{print $3}')" + config set owner "$(ls -ldH "${url#file://}" 2>/dev/null | LC_ALL=C awk '{print $3}')" fi mailaddrs="$(config_get owner)" || : diff --git a/toolbox/reports/project-disk-use.sh b/toolbox/reports/project-disk-use.sh index 40c2340..009c38b 100755 --- a/toolbox/reports/project-disk-use.sh +++ b/toolbox/reports/project-disk-use.sh @@ -161,7 +161,7 @@ while IFS='' read -r proj; do [ -L "$proj.git/objects" ] || ! [ -d "$proj.git/objects" ] || eval "$(git --git-dir="$cfg_reporoot/$proj.git" config --get-regexp \ '^girocco\.((bang\.(count|firstfail|messagesent))|reposizek)$' | - awk '{gsub(/[.]/,"_",$1); $0 ~ / / || sub(/$/," "); sub(/ /,"=\042"); print $0 "\042"}')" || : + LC_ALL=C awk '{gsub(/[.]/,"_",$1); $0 ~ / / || sub(/$/," "); sub(/ /,"=\042"); print $0 "\042"}')" || : b= if [ -n "$girocco_bang_count" ] && [ "${girocco_bang_count#*[!0-9]}" = "$girocco_bang_count" ] && [ "$girocco_bang_count" -gt 0 ]; then banged=$(( $banged + 1 )) -- 2.11.4.GIT