From 3cb47d8b4dc0242665219e9b86825ebdaa43fb0c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 13 Mar 2023 12:57:49 +0100 Subject: [PATCH] [mono] Use `unsigned char` when computing UTF8 string hashes (#21633) Backport of https://github.com/dotnet/runtime/pull/83273 to mono/mono `2020-02` The C standard does not specify whether `char` is signed or unsigned, it is implementation defined. Apparently Android aarch64 makes a different choice than other platforms (at least macOS arm64 and Windows x64 give different results). Mono uses `mono_metadata_str_hash` in the AOT compiler and AOT runtime to optimize class name lookup. As a result, classes whose names include UTF-8 continuation bytes (with the high bit = 1) will hash differently in the AOT compiler and on the device. Contributes to https://github.com/dotnet/runtime/issues/82187 Contributes to https://github.com/dotnet/runtime/issues/78638 --- mono/eglib/ghashtable.c | 2 +- mono/metadata/metadata.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mono/eglib/ghashtable.c b/mono/eglib/ghashtable.c index 5329fab0cab..8799ed46f8e 100644 --- a/mono/eglib/ghashtable.c +++ b/mono/eglib/ghashtable.c @@ -673,7 +673,7 @@ guint g_str_hash (gconstpointer v1) { guint hash = 0; - char *p = (char *) v1; + unsigned char *p = (unsigned char *) v1; while (*p++) hash = (hash << 5) - (hash + *p); diff --git a/mono/metadata/metadata.c b/mono/metadata/metadata.c index 81217077470..47d3273548e 100644 --- a/mono/metadata/metadata.c +++ b/mono/metadata/metadata.c @@ -5532,7 +5532,8 @@ guint mono_metadata_str_hash (gconstpointer v1) { /* Same as g_str_hash () in glib */ - char *p = (char *) v1; + /* note: signed/unsigned char matters - we feed UTF-8 to this function, so the high bit will give diferent results if we don't match. */ + unsigned char *p = (unsigned char *) v1; guint hash = *p; while (*p++) { -- 2.11.4.GIT