1 /* Localization of proper names.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
19 the proper_name function might be candidate for attribute 'const' */
20 #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
21 # pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
27 #include "propername.h"
41 #include "localcharset.h"
42 #include "c-strcase.h"
43 #include "xstriconv.h"
48 /* Tests whether STRING contains trim (SUB), starting and ending at word
50 Here, instead of implementing Unicode Standard Annex #29 for determining
51 word boundaries, we assume that trim (SUB) starts and ends with words and
52 only test whether the part before it ends with a non-word and the part
53 after it starts with a non-word. */
55 mbsstr_trimmed_wordbounded (const char *string
, const char *sub
)
57 char *tsub
= trim (sub
);
60 for (; *string
!= '\0';)
62 const char *tsub_in_string
= mbsstr (string
, tsub
);
63 if (tsub_in_string
== NULL
)
69 mbui_iterator_t string_iter
;
70 bool word_boundary_before
;
71 bool word_boundary_after
;
73 mbui_init (string_iter
, string
);
74 word_boundary_before
= true;
75 if (mbui_cur_ptr (string_iter
) < tsub_in_string
)
77 mbchar_t last_char_before_tsub
;
80 if (!mbui_avail (string_iter
))
82 last_char_before_tsub
= mbui_cur (string_iter
);
83 mbui_advance (string_iter
);
85 while (mbui_cur_ptr (string_iter
) < tsub_in_string
);
86 if (mb_isalnum (last_char_before_tsub
))
87 word_boundary_before
= false;
90 mbui_init (string_iter
, tsub_in_string
);
92 mbui_iterator_t tsub_iter
;
94 for (mbui_init (tsub_iter
, tsub
);
95 mbui_avail (tsub_iter
);
96 mbui_advance (tsub_iter
))
98 if (!mbui_avail (string_iter
))
100 mbui_advance (string_iter
);
103 word_boundary_after
= true;
104 if (mbui_avail (string_iter
))
106 mbchar_t first_char_after_tsub
= mbui_cur (string_iter
);
107 if (mb_isalnum (first_char_after_tsub
))
108 word_boundary_after
= false;
111 if (word_boundary_before
&& word_boundary_after
)
117 mbui_init (string_iter
, tsub_in_string
);
118 if (!mbui_avail (string_iter
))
120 string
= tsub_in_string
+ mb_len (mbui_cur (string_iter
));
124 bool word_boundary_before
;
126 bool word_boundary_after
;
128 word_boundary_before
= true;
129 if (string
< tsub_in_string
)
130 if (isalnum ((unsigned char) tsub_in_string
[-1]))
131 word_boundary_before
= false;
133 p
= tsub_in_string
+ strlen (tsub
);
134 word_boundary_after
= true;
136 if (isalnum ((unsigned char) *p
))
137 word_boundary_after
= false;
139 if (word_boundary_before
&& word_boundary_after
)
145 if (*tsub_in_string
== '\0')
147 string
= tsub_in_string
+ 1;
155 /* Return the localization of NAME. NAME is written in ASCII. */
158 proper_name (const char *name
)
160 /* See whether there is a translation. */
161 const char *translation
= gettext (name
);
163 if (translation
!= name
)
165 /* See whether the translation contains the original name. */
166 if (mbsstr_trimmed_wordbounded (translation
, name
))
170 /* Return "TRANSLATION (NAME)". */
172 XNMALLOC (strlen (translation
) + 2 + strlen (name
) + 1 + 1, char);
174 sprintf (result
, "%s (%s)", translation
, name
);
182 /* Return the localization of a name whose original writing is not ASCII.
183 NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
184 escape sequences. NAME_ASCII is a fallback written only with ASCII
188 proper_name_utf8 (const char *name_ascii
, const char *name_utf8
)
190 /* See whether there is a translation. */
191 const char *translation
= gettext (name_ascii
);
193 /* Try to convert NAME_UTF8 to the locale encoding. */
194 const char *locale_code
= locale_charset ();
195 char *alloc_name_converted
= NULL
;
196 char *alloc_name_converted_translit
= NULL
;
197 const char *name_converted
= NULL
;
198 const char *name_converted_translit
= NULL
;
201 if (c_strcasecmp (locale_code
, "UTF-8") != 0)
204 name_converted
= alloc_name_converted
=
205 xstr_iconv (name_utf8
, "UTF-8", locale_code
);
207 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
208 && !defined __UCLIBC__) \
209 || _LIBICONV_VERSION >= 0x0105
211 char *converted_translit
;
213 size_t len
= strlen (locale_code
);
214 char *locale_code_translit
= XNMALLOC (len
+ 10 + 1, char);
215 memcpy (locale_code_translit
, locale_code
, len
);
216 memcpy (locale_code_translit
+ len
, "//TRANSLIT", 10 + 1);
219 xstr_iconv (name_utf8
, "UTF-8", locale_code_translit
);
221 free (locale_code_translit
);
223 if (converted_translit
!= NULL
)
225 # if !_LIBICONV_VERSION
226 /* Don't use the transliteration if it added question marks.
227 glibc's transliteration falls back to question marks; libiconv's
228 transliteration does not.
229 mbschr is equivalent to strchr in this case. */
230 if (strchr (converted_translit
, '?') != NULL
)
231 free (converted_translit
);
234 name_converted_translit
= alloc_name_converted_translit
=
243 name_converted
= name_utf8
;
244 name_converted_translit
= name_utf8
;
247 /* The name in locale encoding. */
248 name
= (name_converted
!= NULL
? name_converted
:
249 name_converted_translit
!= NULL
? name_converted_translit
:
252 /* See whether we have a translation. Some translators have not understood
253 that they should use the UTF-8 form of the name, if possible. So if the
254 translator provided a no-op translation, we ignore it. */
255 if (strcmp (translation
, name_ascii
) != 0)
257 /* See whether the translation contains the original name. */
258 if (mbsstr_trimmed_wordbounded (translation
, name_ascii
)
259 || (name_converted
!= NULL
260 && mbsstr_trimmed_wordbounded (translation
, name_converted
))
261 || (name_converted_translit
!= NULL
262 && mbsstr_trimmed_wordbounded (translation
, name_converted_translit
)))
264 if (alloc_name_converted
!= NULL
)
265 free (alloc_name_converted
);
266 if (alloc_name_converted_translit
!= NULL
)
267 free (alloc_name_converted_translit
);
272 /* Return "TRANSLATION (NAME)". */
274 XNMALLOC (strlen (translation
) + 2 + strlen (name
) + 1 + 1, char);
276 sprintf (result
, "%s (%s)", translation
, name
);
278 if (alloc_name_converted
!= NULL
)
279 free (alloc_name_converted
);
280 if (alloc_name_converted_translit
!= NULL
)
281 free (alloc_name_converted_translit
);
287 if (alloc_name_converted
!= NULL
&& alloc_name_converted
!= name
)
288 free (alloc_name_converted
);
289 if (alloc_name_converted_translit
!= NULL
290 && alloc_name_converted_translit
!= name
)
291 free (alloc_name_converted_translit
);
299 main (int argc
, char *argv
[])
301 setlocale (LC_ALL
, "");
302 if (mbsstr_trimmed_wordbounded (argv
[1], argv
[2]))
312 main (int argc
, char *argv
[])
314 setlocale (LC_ALL
, "");
315 printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));