1 /* Localization of proper names.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
19 the proper_name function might be candidate for attribute 'const' */
20 #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
21 # pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
27 #include "propername.h"
38 #if GNULIB_MCEL_PREFER
44 #include "localcharset.h"
45 #include "c-strcase.h"
46 #include "xstriconv.h"
51 /* Tests whether STRING contains trim (SUB), starting and ending at word
53 Here, instead of implementing Unicode Standard Annex #29 for determining
54 word boundaries, we assume that trim (SUB) starts and ends with words and
55 only test whether the part before it ends with a non-word and the part
56 after it starts with a non-word. */
58 mbsstr_trimmed_wordbounded (const char *string
, const char *sub
)
60 char *tsub
= trim (sub
);
62 bool multibyte_locale
= MB_CUR_MAX
> 1;
64 if (! multibyte_locale
)
65 tsublen
= strlen (tsub
);
67 while (*string
!= '\0')
69 const char *tsub_in_string
= mbsstr (string
, tsub
);
70 if (tsub_in_string
== NULL
)
76 #if GNULIB_MCEL_PREFER
77 char const *string_iter
= string
;
79 char32_t last_char_before_tsub
= 0;
80 while (string_iter
< tsub_in_string
)
82 mcel_t g
= mcel_scanz (string_iter
);
83 last_char_before_tsub
= g
.ch
;
87 string_iter
= tsub_in_string
;
88 for (char const *tsub_iter
= tsub
; *tsub_iter
;
89 tsub_iter
+= mcel_scanz (tsub_iter
).len
)
90 string_iter
+= mcel_scanz (string_iter
).len
;
92 if (!c32isalnum (last_char_before_tsub
)
93 && !c32isalnum (mcel_scanz (string_iter
).ch
))
101 string
= tsub_in_string
+ mcel_scanz (tsub_in_string
).len
;
103 mbui_iterator_t string_iter
;
104 bool word_boundary_before
;
105 bool word_boundary_after
;
107 mbui_init (string_iter
, string
);
108 word_boundary_before
= true;
109 if (mbui_cur_ptr (string_iter
) < tsub_in_string
)
111 mbchar_t last_char_before_tsub
;
114 if (!mbui_avail (string_iter
))
116 last_char_before_tsub
= mbui_cur (string_iter
);
117 mbui_advance (string_iter
);
119 while (mbui_cur_ptr (string_iter
) < tsub_in_string
);
120 if (mb_isalnum (last_char_before_tsub
))
121 word_boundary_before
= false;
124 mbui_init (string_iter
, tsub_in_string
);
126 mbui_iterator_t tsub_iter
;
128 for (mbui_init (tsub_iter
, tsub
);
129 mbui_avail (tsub_iter
);
130 mbui_advance (tsub_iter
))
132 if (!mbui_avail (string_iter
))
134 mbui_advance (string_iter
);
137 word_boundary_after
= true;
138 if (mbui_avail (string_iter
))
140 mbchar_t first_char_after_tsub
= mbui_cur (string_iter
);
141 if (mb_isalnum (first_char_after_tsub
))
142 word_boundary_after
= false;
145 if (word_boundary_before
&& word_boundary_after
)
151 mbui_init (string_iter
, tsub_in_string
);
152 if (!mbui_avail (string_iter
))
154 string
= tsub_in_string
+ mb_len (mbui_cur (string_iter
));
159 if ((string
== tsub_in_string
160 || !isalnum ((unsigned char) tsub_in_string
[-1]))
161 && !isalnum ((unsigned char) tsub_in_string
[tsublen
]))
167 if (*tsub_in_string
== '\0')
169 string
= tsub_in_string
+ 1;
177 /* Return the localization of NAME. NAME is written in ASCII. */
180 proper_name (const char *name
)
182 /* See whether there is a translation. */
183 const char *translation
= gettext (name
);
185 if (translation
!= name
)
187 /* See whether the translation contains the original name. */
188 if (mbsstr_trimmed_wordbounded (translation
, name
))
192 /* Return "TRANSLATION (NAME)". */
194 XNMALLOC (strlen (translation
) + 2 + strlen (name
) + 1 + 1, char);
196 sprintf (result
, "%s (%s)", translation
, name
);
204 /* Return the localization of a name whose original writing is not ASCII.
205 NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
206 escape sequences. NAME_ASCII is a fallback written only with ASCII
210 proper_name_utf8 (const char *name_ascii
, const char *name_utf8
)
212 /* See whether there is a translation. */
213 const char *translation
= gettext (name_ascii
);
215 /* Try to convert NAME_UTF8 to the locale encoding. */
216 const char *locale_code
= locale_charset ();
217 char *alloc_name_converted
= NULL
;
218 char *alloc_name_converted_translit
= NULL
;
219 const char *name_converted
= NULL
;
220 const char *name_converted_translit
= NULL
;
223 if (c_strcasecmp (locale_code
, "UTF-8") != 0)
226 name_converted
= alloc_name_converted
=
227 xstr_iconv (name_utf8
, "UTF-8", locale_code
);
229 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
230 && !defined __UCLIBC__) \
231 || _LIBICONV_VERSION >= 0x0105 \
232 || defined ICONV_SET_TRANSLITERATE
234 char *converted_translit
;
236 size_t len
= strlen (locale_code
);
237 char *locale_code_translit
= XNMALLOC (len
+ 10 + 1, char);
238 memcpy (locale_code_translit
, locale_code
, len
);
239 memcpy (locale_code_translit
+ len
, "//TRANSLIT", 10 + 1);
242 xstr_iconv (name_utf8
, "UTF-8", locale_code_translit
);
244 free (locale_code_translit
);
246 if (converted_translit
!= NULL
)
248 # if !_LIBICONV_VERSION || (_LIBICONV_VERSION == 0x10b && defined __APPLE__)
249 /* Don't use the transliteration if it added question marks.
250 glibc's transliteration falls back to question marks; libiconv's
251 transliteration does not.
252 mbschr is equivalent to strchr in this case. */
253 if (strchr (converted_translit
, '?') != NULL
)
254 free (converted_translit
);
257 name_converted_translit
= alloc_name_converted_translit
=
266 name_converted
= name_utf8
;
267 name_converted_translit
= name_utf8
;
270 /* The name in locale encoding. */
271 name
= (name_converted
!= NULL
? name_converted
:
272 name_converted_translit
!= NULL
? name_converted_translit
:
275 /* See whether we have a translation. Some translators have not understood
276 that they should use the UTF-8 form of the name, if possible. So if the
277 translator provided a no-op translation, we ignore it. */
278 if (strcmp (translation
, name_ascii
) != 0)
280 /* See whether the translation contains the original name. */
281 if (mbsstr_trimmed_wordbounded (translation
, name_ascii
)
282 || (name_converted
!= NULL
283 && mbsstr_trimmed_wordbounded (translation
, name_converted
))
284 || (name_converted_translit
!= NULL
285 && mbsstr_trimmed_wordbounded (translation
, name_converted_translit
)))
287 if (alloc_name_converted
!= NULL
)
288 free (alloc_name_converted
);
289 if (alloc_name_converted_translit
!= NULL
)
290 free (alloc_name_converted_translit
);
295 /* Return "TRANSLATION (NAME)". */
297 XNMALLOC (strlen (translation
) + 2 + strlen (name
) + 1 + 1, char);
299 sprintf (result
, "%s (%s)", translation
, name
);
301 if (alloc_name_converted
!= NULL
)
302 free (alloc_name_converted
);
303 if (alloc_name_converted_translit
!= NULL
)
304 free (alloc_name_converted_translit
);
310 if (alloc_name_converted
!= NULL
&& alloc_name_converted
!= name
)
311 free (alloc_name_converted
);
312 if (alloc_name_converted_translit
!= NULL
313 && alloc_name_converted_translit
!= name
)
314 free (alloc_name_converted_translit
);
322 main (int argc
, char *argv
[])
324 setlocale (LC_ALL
, "");
325 if (mbsstr_trimmed_wordbounded (argv
[1], argv
[2]))
335 main (int argc
, char *argv
[])
337 setlocale (LC_ALL
, "");
338 printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));