From ae5fe801005cb561afdff2bfe2f76e0c9c756ed5 Mon Sep 17 00:00:00 2001
From: Kalle Olavi Niemitalo <kon@iki.fi>
Date: Mon, 29 Jan 2007 20:57:37 +0200
Subject: [PATCH] Document that NBSP_CHAR is not used in UTF-8 strings.

---
 src/intl/charsets.c | 3 ++-
 src/intl/charsets.h | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index b51a6189..c8702927 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -183,7 +183,8 @@ u2cp_(unicode_val_T u, int to, enum nbsp_mode nbsp_mode)
 		return encode_utf8(u);
 #endif /* CONFIG_UTF8 */
 
-	/* To mark non breaking spaces, we use a special char NBSP_CHAR. */
+	/* To mark non breaking spaces in non-UTF-8 strings, we use a
+	 * special char NBSP_CHAR. */
 	if (u == 0xa0) {
 		if (nbsp_mode == NBSP_MODE_HACK) return NBSP_CHAR_STRING;
 		else /* NBSP_MODE_ASCII */ return " ";
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index 69fb3cb9..66768b7e 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -31,7 +31,8 @@ typedef uint32_t unicode_val_T;
  * We should fix that if we ever change the value.  */
 #define UCS_ORPHAN_CELL ((unicode_val_T) 0x20)
 
-/* &nbsp; replacement character. See u2cp(). */
+/* &nbsp; replacement character. See u2cp().
+ * UTF-8 strings should use the encoding of U+00A0 instead. */
 #define NBSP_CHAR ((unsigned char) 1)
 #define NBSP_CHAR_STRING "\001"
 
@@ -64,7 +65,8 @@ enum convert_string_mode {
 	CSM_NONE, /* Convert nothing. */
 };
 
-/* How to translate non-breaking spaces.  */
+/* How to translate U+00A0 NO-BREAK SPACE.  If u2cp_ is converting to
+ * UTF-8, it ignores this choice and just encodes the U+00A0.  */
 enum nbsp_mode {
 	/* Convert to NBSP_CHAR.  This lets the HTML renderer
 	 * recognize nbsp even if the codepage doesn't support
-- 
2.11.4.GIT