Update copyright for 2022
[pgsql.git] / src / common / saslprep.c
blob6aaba8e422ee90c5f9cb00ce4e244ce41941251c
1 /*-------------------------------------------------------------------------
2 * saslprep.c
3 * SASLprep normalization, for SCRAM authentication
5 * The SASLprep algorithm is used to process a user-supplied password into
6 * canonical form. For more details, see:
8 * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9 * http://www.ietf.org/rfc/rfc3454.txt
11 * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12 * http://www.ietf.org/rfc/rfc4013.txt
15 * Portions Copyright (c) 2017-2022, PostgreSQL Global Development Group
17 * IDENTIFICATION
18 * src/common/saslprep.c
20 *-------------------------------------------------------------------------
22 #ifndef FRONTEND
23 #include "postgres.h"
24 #else
25 #include "postgres_fe.h"
26 #endif
28 #include "common/saslprep.h"
29 #include "common/string.h"
30 #include "common/unicode_norm.h"
31 #include "mb/pg_wchar.h"
34 * In backend, we will use palloc/pfree. In frontend, use malloc, and
35 * return SASLPREP_OOM on out-of-memory.
37 #ifndef FRONTEND
38 #define STRDUP(s) pstrdup(s)
39 #define ALLOC(size) palloc(size)
40 #define FREE(size) pfree(size)
41 #else
42 #define STRDUP(s) strdup(s)
43 #define ALLOC(size) malloc(size)
44 #define FREE(size) free(size)
45 #endif
47 /* Prototypes for local functions */
48 static int codepoint_range_cmp(const void *a, const void *b);
49 static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
50 static int pg_utf8_string_len(const char *source);
53 * Stringprep Mapping Tables.
55 * The stringprep specification includes a number of tables of Unicode
56 * codepoints, used in different parts of the algorithm. They are below,
57 * as arrays of codepoint ranges. Each range is a pair of codepoints,
58 * for the first and last codepoint included the range (inclusive!).
62 * C.1.2 Non-ASCII space characters
64 * These are all mapped to the ASCII space character (U+00A0).
66 static const pg_wchar non_ascii_space_ranges[] =
68 0x00A0, 0x00A0,
69 0x1680, 0x1680,
70 0x2000, 0x200B,
71 0x202F, 0x202F,
72 0x205F, 0x205F,
73 0x3000, 0x3000
77 * B.1 Commonly mapped to nothing
79 * If any of these appear in the input, they are removed.
81 static const pg_wchar commonly_mapped_to_nothing_ranges[] =
83 0x00AD, 0x00AD,
84 0x034F, 0x034F,
85 0x1806, 0x1806,
86 0x180B, 0x180D,
87 0x200B, 0x200D,
88 0x2060, 0x2060,
89 0xFE00, 0xFE0F,
90 0xFEFF, 0xFEFF
94 * prohibited_output_ranges is a union of all the characters from
95 * the following tables:
97 * C.1.2 Non-ASCII space characters
98 * C.2.1 ASCII control characters
99 * C.2.2 Non-ASCII control characters
100 * C.3 Private Use characters
101 * C.4 Non-character code points
102 * C.5 Surrogate code points
103 * C.6 Inappropriate for plain text characters
104 * C.7 Inappropriate for canonical representation characters
105 * C.7 Change display properties or deprecated characters
106 * C.8 Tagging characters
108 * These are the tables that are listed as "prohibited output"
109 * characters in the SASLprep profile.
111 * The comment after each code range indicates which source table
112 * the code came from. Note that there is some overlap in the source
113 * tables, so one code might originate from multiple source tables.
114 * Adjacent ranges have also been merged together, to save space.
116 static const pg_wchar prohibited_output_ranges[] =
118 0x0000, 0x001F, /* C.2.1 */
119 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
120 0x0340, 0x0341, /* C.8 */
121 0x06DD, 0x06DD, /* C.2.2 */
122 0x070F, 0x070F, /* C.2.2 */
123 0x1680, 0x1680, /* C.1.2 */
124 0x180E, 0x180E, /* C.2.2 */
125 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
126 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
127 0x205F, 0x2063, /* C.1.2, C.2.2 */
128 0x206A, 0x206F, /* C.2.2, C.8 */
129 0x2FF0, 0x2FFB, /* C.7 */
130 0x3000, 0x3000, /* C.1.2 */
131 0xD800, 0xF8FF, /* C.3, C.5 */
132 0xFDD0, 0xFDEF, /* C.4 */
133 0xFEFF, 0xFEFF, /* C.2.2 */
134 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
135 0x1D173, 0x1D17A, /* C.2.2 */
136 0x1FFFE, 0x1FFFF, /* C.4 */
137 0x2FFFE, 0x2FFFF, /* C.4 */
138 0x3FFFE, 0x3FFFF, /* C.4 */
139 0x4FFFE, 0x4FFFF, /* C.4 */
140 0x5FFFE, 0x5FFFF, /* C.4 */
141 0x6FFFE, 0x6FFFF, /* C.4 */
142 0x7FFFE, 0x7FFFF, /* C.4 */
143 0x8FFFE, 0x8FFFF, /* C.4 */
144 0x9FFFE, 0x9FFFF, /* C.4 */
145 0xAFFFE, 0xAFFFF, /* C.4 */
146 0xBFFFE, 0xBFFFF, /* C.4 */
147 0xCFFFE, 0xCFFFF, /* C.4 */
148 0xDFFFE, 0xDFFFF, /* C.4 */
149 0xE0001, 0xE0001, /* C.9 */
150 0xE0020, 0xE007F, /* C.9 */
151 0xEFFFE, 0xEFFFF, /* C.4 */
152 0xF0000, 0xFFFFF, /* C.3, C.4 */
153 0x100000, 0x10FFFF /* C.3, C.4 */
156 /* A.1 Unassigned code points in Unicode 3.2 */
157 static const pg_wchar unassigned_codepoint_ranges[] =
159 0x0221, 0x0221,
160 0x0234, 0x024F,
161 0x02AE, 0x02AF,
162 0x02EF, 0x02FF,
163 0x0350, 0x035F,
164 0x0370, 0x0373,
165 0x0376, 0x0379,
166 0x037B, 0x037D,
167 0x037F, 0x0383,
168 0x038B, 0x038B,
169 0x038D, 0x038D,
170 0x03A2, 0x03A2,
171 0x03CF, 0x03CF,
172 0x03F7, 0x03FF,
173 0x0487, 0x0487,
174 0x04CF, 0x04CF,
175 0x04F6, 0x04F7,
176 0x04FA, 0x04FF,
177 0x0510, 0x0530,
178 0x0557, 0x0558,
179 0x0560, 0x0560,
180 0x0588, 0x0588,
181 0x058B, 0x0590,
182 0x05A2, 0x05A2,
183 0x05BA, 0x05BA,
184 0x05C5, 0x05CF,
185 0x05EB, 0x05EF,
186 0x05F5, 0x060B,
187 0x060D, 0x061A,
188 0x061C, 0x061E,
189 0x0620, 0x0620,
190 0x063B, 0x063F,
191 0x0656, 0x065F,
192 0x06EE, 0x06EF,
193 0x06FF, 0x06FF,
194 0x070E, 0x070E,
195 0x072D, 0x072F,
196 0x074B, 0x077F,
197 0x07B2, 0x0900,
198 0x0904, 0x0904,
199 0x093A, 0x093B,
200 0x094E, 0x094F,
201 0x0955, 0x0957,
202 0x0971, 0x0980,
203 0x0984, 0x0984,
204 0x098D, 0x098E,
205 0x0991, 0x0992,
206 0x09A9, 0x09A9,
207 0x09B1, 0x09B1,
208 0x09B3, 0x09B5,
209 0x09BA, 0x09BB,
210 0x09BD, 0x09BD,
211 0x09C5, 0x09C6,
212 0x09C9, 0x09CA,
213 0x09CE, 0x09D6,
214 0x09D8, 0x09DB,
215 0x09DE, 0x09DE,
216 0x09E4, 0x09E5,
217 0x09FB, 0x0A01,
218 0x0A03, 0x0A04,
219 0x0A0B, 0x0A0E,
220 0x0A11, 0x0A12,
221 0x0A29, 0x0A29,
222 0x0A31, 0x0A31,
223 0x0A34, 0x0A34,
224 0x0A37, 0x0A37,
225 0x0A3A, 0x0A3B,
226 0x0A3D, 0x0A3D,
227 0x0A43, 0x0A46,
228 0x0A49, 0x0A4A,
229 0x0A4E, 0x0A58,
230 0x0A5D, 0x0A5D,
231 0x0A5F, 0x0A65,
232 0x0A75, 0x0A80,
233 0x0A84, 0x0A84,
234 0x0A8C, 0x0A8C,
235 0x0A8E, 0x0A8E,
236 0x0A92, 0x0A92,
237 0x0AA9, 0x0AA9,
238 0x0AB1, 0x0AB1,
239 0x0AB4, 0x0AB4,
240 0x0ABA, 0x0ABB,
241 0x0AC6, 0x0AC6,
242 0x0ACA, 0x0ACA,
243 0x0ACE, 0x0ACF,
244 0x0AD1, 0x0ADF,
245 0x0AE1, 0x0AE5,
246 0x0AF0, 0x0B00,
247 0x0B04, 0x0B04,
248 0x0B0D, 0x0B0E,
249 0x0B11, 0x0B12,
250 0x0B29, 0x0B29,
251 0x0B31, 0x0B31,
252 0x0B34, 0x0B35,
253 0x0B3A, 0x0B3B,
254 0x0B44, 0x0B46,
255 0x0B49, 0x0B4A,
256 0x0B4E, 0x0B55,
257 0x0B58, 0x0B5B,
258 0x0B5E, 0x0B5E,
259 0x0B62, 0x0B65,
260 0x0B71, 0x0B81,
261 0x0B84, 0x0B84,
262 0x0B8B, 0x0B8D,
263 0x0B91, 0x0B91,
264 0x0B96, 0x0B98,
265 0x0B9B, 0x0B9B,
266 0x0B9D, 0x0B9D,
267 0x0BA0, 0x0BA2,
268 0x0BA5, 0x0BA7,
269 0x0BAB, 0x0BAD,
270 0x0BB6, 0x0BB6,
271 0x0BBA, 0x0BBD,
272 0x0BC3, 0x0BC5,
273 0x0BC9, 0x0BC9,
274 0x0BCE, 0x0BD6,
275 0x0BD8, 0x0BE6,
276 0x0BF3, 0x0C00,
277 0x0C04, 0x0C04,
278 0x0C0D, 0x0C0D,
279 0x0C11, 0x0C11,
280 0x0C29, 0x0C29,
281 0x0C34, 0x0C34,
282 0x0C3A, 0x0C3D,
283 0x0C45, 0x0C45,
284 0x0C49, 0x0C49,
285 0x0C4E, 0x0C54,
286 0x0C57, 0x0C5F,
287 0x0C62, 0x0C65,
288 0x0C70, 0x0C81,
289 0x0C84, 0x0C84,
290 0x0C8D, 0x0C8D,
291 0x0C91, 0x0C91,
292 0x0CA9, 0x0CA9,
293 0x0CB4, 0x0CB4,
294 0x0CBA, 0x0CBD,
295 0x0CC5, 0x0CC5,
296 0x0CC9, 0x0CC9,
297 0x0CCE, 0x0CD4,
298 0x0CD7, 0x0CDD,
299 0x0CDF, 0x0CDF,
300 0x0CE2, 0x0CE5,
301 0x0CF0, 0x0D01,
302 0x0D04, 0x0D04,
303 0x0D0D, 0x0D0D,
304 0x0D11, 0x0D11,
305 0x0D29, 0x0D29,
306 0x0D3A, 0x0D3D,
307 0x0D44, 0x0D45,
308 0x0D49, 0x0D49,
309 0x0D4E, 0x0D56,
310 0x0D58, 0x0D5F,
311 0x0D62, 0x0D65,
312 0x0D70, 0x0D81,
313 0x0D84, 0x0D84,
314 0x0D97, 0x0D99,
315 0x0DB2, 0x0DB2,
316 0x0DBC, 0x0DBC,
317 0x0DBE, 0x0DBF,
318 0x0DC7, 0x0DC9,
319 0x0DCB, 0x0DCE,
320 0x0DD5, 0x0DD5,
321 0x0DD7, 0x0DD7,
322 0x0DE0, 0x0DF1,
323 0x0DF5, 0x0E00,
324 0x0E3B, 0x0E3E,
325 0x0E5C, 0x0E80,
326 0x0E83, 0x0E83,
327 0x0E85, 0x0E86,
328 0x0E89, 0x0E89,
329 0x0E8B, 0x0E8C,
330 0x0E8E, 0x0E93,
331 0x0E98, 0x0E98,
332 0x0EA0, 0x0EA0,
333 0x0EA4, 0x0EA4,
334 0x0EA6, 0x0EA6,
335 0x0EA8, 0x0EA9,
336 0x0EAC, 0x0EAC,
337 0x0EBA, 0x0EBA,
338 0x0EBE, 0x0EBF,
339 0x0EC5, 0x0EC5,
340 0x0EC7, 0x0EC7,
341 0x0ECE, 0x0ECF,
342 0x0EDA, 0x0EDB,
343 0x0EDE, 0x0EFF,
344 0x0F48, 0x0F48,
345 0x0F6B, 0x0F70,
346 0x0F8C, 0x0F8F,
347 0x0F98, 0x0F98,
348 0x0FBD, 0x0FBD,
349 0x0FCD, 0x0FCE,
350 0x0FD0, 0x0FFF,
351 0x1022, 0x1022,
352 0x1028, 0x1028,
353 0x102B, 0x102B,
354 0x1033, 0x1035,
355 0x103A, 0x103F,
356 0x105A, 0x109F,
357 0x10C6, 0x10CF,
358 0x10F9, 0x10FA,
359 0x10FC, 0x10FF,
360 0x115A, 0x115E,
361 0x11A3, 0x11A7,
362 0x11FA, 0x11FF,
363 0x1207, 0x1207,
364 0x1247, 0x1247,
365 0x1249, 0x1249,
366 0x124E, 0x124F,
367 0x1257, 0x1257,
368 0x1259, 0x1259,
369 0x125E, 0x125F,
370 0x1287, 0x1287,
371 0x1289, 0x1289,
372 0x128E, 0x128F,
373 0x12AF, 0x12AF,
374 0x12B1, 0x12B1,
375 0x12B6, 0x12B7,
376 0x12BF, 0x12BF,
377 0x12C1, 0x12C1,
378 0x12C6, 0x12C7,
379 0x12CF, 0x12CF,
380 0x12D7, 0x12D7,
381 0x12EF, 0x12EF,
382 0x130F, 0x130F,
383 0x1311, 0x1311,
384 0x1316, 0x1317,
385 0x131F, 0x131F,
386 0x1347, 0x1347,
387 0x135B, 0x1360,
388 0x137D, 0x139F,
389 0x13F5, 0x1400,
390 0x1677, 0x167F,
391 0x169D, 0x169F,
392 0x16F1, 0x16FF,
393 0x170D, 0x170D,
394 0x1715, 0x171F,
395 0x1737, 0x173F,
396 0x1754, 0x175F,
397 0x176D, 0x176D,
398 0x1771, 0x1771,
399 0x1774, 0x177F,
400 0x17DD, 0x17DF,
401 0x17EA, 0x17FF,
402 0x180F, 0x180F,
403 0x181A, 0x181F,
404 0x1878, 0x187F,
405 0x18AA, 0x1DFF,
406 0x1E9C, 0x1E9F,
407 0x1EFA, 0x1EFF,
408 0x1F16, 0x1F17,
409 0x1F1E, 0x1F1F,
410 0x1F46, 0x1F47,
411 0x1F4E, 0x1F4F,
412 0x1F58, 0x1F58,
413 0x1F5A, 0x1F5A,
414 0x1F5C, 0x1F5C,
415 0x1F5E, 0x1F5E,
416 0x1F7E, 0x1F7F,
417 0x1FB5, 0x1FB5,
418 0x1FC5, 0x1FC5,
419 0x1FD4, 0x1FD5,
420 0x1FDC, 0x1FDC,
421 0x1FF0, 0x1FF1,
422 0x1FF5, 0x1FF5,
423 0x1FFF, 0x1FFF,
424 0x2053, 0x2056,
425 0x2058, 0x205E,
426 0x2064, 0x2069,
427 0x2072, 0x2073,
428 0x208F, 0x209F,
429 0x20B2, 0x20CF,
430 0x20EB, 0x20FF,
431 0x213B, 0x213C,
432 0x214C, 0x2152,
433 0x2184, 0x218F,
434 0x23CF, 0x23FF,
435 0x2427, 0x243F,
436 0x244B, 0x245F,
437 0x24FF, 0x24FF,
438 0x2614, 0x2615,
439 0x2618, 0x2618,
440 0x267E, 0x267F,
441 0x268A, 0x2700,
442 0x2705, 0x2705,
443 0x270A, 0x270B,
444 0x2728, 0x2728,
445 0x274C, 0x274C,
446 0x274E, 0x274E,
447 0x2753, 0x2755,
448 0x2757, 0x2757,
449 0x275F, 0x2760,
450 0x2795, 0x2797,
451 0x27B0, 0x27B0,
452 0x27BF, 0x27CF,
453 0x27EC, 0x27EF,
454 0x2B00, 0x2E7F,
455 0x2E9A, 0x2E9A,
456 0x2EF4, 0x2EFF,
457 0x2FD6, 0x2FEF,
458 0x2FFC, 0x2FFF,
459 0x3040, 0x3040,
460 0x3097, 0x3098,
461 0x3100, 0x3104,
462 0x312D, 0x3130,
463 0x318F, 0x318F,
464 0x31B8, 0x31EF,
465 0x321D, 0x321F,
466 0x3244, 0x3250,
467 0x327C, 0x327E,
468 0x32CC, 0x32CF,
469 0x32FF, 0x32FF,
470 0x3377, 0x337A,
471 0x33DE, 0x33DF,
472 0x33FF, 0x33FF,
473 0x4DB6, 0x4DFF,
474 0x9FA6, 0x9FFF,
475 0xA48D, 0xA48F,
476 0xA4C7, 0xABFF,
477 0xD7A4, 0xD7FF,
478 0xFA2E, 0xFA2F,
479 0xFA6B, 0xFAFF,
480 0xFB07, 0xFB12,
481 0xFB18, 0xFB1C,
482 0xFB37, 0xFB37,
483 0xFB3D, 0xFB3D,
484 0xFB3F, 0xFB3F,
485 0xFB42, 0xFB42,
486 0xFB45, 0xFB45,
487 0xFBB2, 0xFBD2,
488 0xFD40, 0xFD4F,
489 0xFD90, 0xFD91,
490 0xFDC8, 0xFDCF,
491 0xFDFD, 0xFDFF,
492 0xFE10, 0xFE1F,
493 0xFE24, 0xFE2F,
494 0xFE47, 0xFE48,
495 0xFE53, 0xFE53,
496 0xFE67, 0xFE67,
497 0xFE6C, 0xFE6F,
498 0xFE75, 0xFE75,
499 0xFEFD, 0xFEFE,
500 0xFF00, 0xFF00,
501 0xFFBF, 0xFFC1,
502 0xFFC8, 0xFFC9,
503 0xFFD0, 0xFFD1,
504 0xFFD8, 0xFFD9,
505 0xFFDD, 0xFFDF,
506 0xFFE7, 0xFFE7,
507 0xFFEF, 0xFFF8,
508 0x10000, 0x102FF,
509 0x1031F, 0x1031F,
510 0x10324, 0x1032F,
511 0x1034B, 0x103FF,
512 0x10426, 0x10427,
513 0x1044E, 0x1CFFF,
514 0x1D0F6, 0x1D0FF,
515 0x1D127, 0x1D129,
516 0x1D1DE, 0x1D3FF,
517 0x1D455, 0x1D455,
518 0x1D49D, 0x1D49D,
519 0x1D4A0, 0x1D4A1,
520 0x1D4A3, 0x1D4A4,
521 0x1D4A7, 0x1D4A8,
522 0x1D4AD, 0x1D4AD,
523 0x1D4BA, 0x1D4BA,
524 0x1D4BC, 0x1D4BC,
525 0x1D4C1, 0x1D4C1,
526 0x1D4C4, 0x1D4C4,
527 0x1D506, 0x1D506,
528 0x1D50B, 0x1D50C,
529 0x1D515, 0x1D515,
530 0x1D51D, 0x1D51D,
531 0x1D53A, 0x1D53A,
532 0x1D53F, 0x1D53F,
533 0x1D545, 0x1D545,
534 0x1D547, 0x1D549,
535 0x1D551, 0x1D551,
536 0x1D6A4, 0x1D6A7,
537 0x1D7CA, 0x1D7CD,
538 0x1D800, 0x1FFFD,
539 0x2A6D7, 0x2F7FF,
540 0x2FA1E, 0x2FFFD,
541 0x30000, 0x3FFFD,
542 0x40000, 0x4FFFD,
543 0x50000, 0x5FFFD,
544 0x60000, 0x6FFFD,
545 0x70000, 0x7FFFD,
546 0x80000, 0x8FFFD,
547 0x90000, 0x9FFFD,
548 0xA0000, 0xAFFFD,
549 0xB0000, 0xBFFFD,
550 0xC0000, 0xCFFFD,
551 0xD0000, 0xDFFFD,
552 0xE0000, 0xE0000,
553 0xE0002, 0xE001F,
554 0xE0080, 0xEFFFD
557 /* D.1 Characters with bidirectional property "R" or "AL" */
558 static const pg_wchar RandALCat_codepoint_ranges[] =
560 0x05BE, 0x05BE,
561 0x05C0, 0x05C0,
562 0x05C3, 0x05C3,
563 0x05D0, 0x05EA,
564 0x05F0, 0x05F4,
565 0x061B, 0x061B,
566 0x061F, 0x061F,
567 0x0621, 0x063A,
568 0x0640, 0x064A,
569 0x066D, 0x066F,
570 0x0671, 0x06D5,
571 0x06DD, 0x06DD,
572 0x06E5, 0x06E6,
573 0x06FA, 0x06FE,
574 0x0700, 0x070D,
575 0x0710, 0x0710,
576 0x0712, 0x072C,
577 0x0780, 0x07A5,
578 0x07B1, 0x07B1,
579 0x200F, 0x200F,
580 0xFB1D, 0xFB1D,
581 0xFB1F, 0xFB28,
582 0xFB2A, 0xFB36,
583 0xFB38, 0xFB3C,
584 0xFB3E, 0xFB3E,
585 0xFB40, 0xFB41,
586 0xFB43, 0xFB44,
587 0xFB46, 0xFBB1,
588 0xFBD3, 0xFD3D,
589 0xFD50, 0xFD8F,
590 0xFD92, 0xFDC7,
591 0xFDF0, 0xFDFC,
592 0xFE70, 0xFE74,
593 0xFE76, 0xFEFC
596 /* D.2 Characters with bidirectional property "L" */
597 static const pg_wchar LCat_codepoint_ranges[] =
599 0x0041, 0x005A,
600 0x0061, 0x007A,
601 0x00AA, 0x00AA,
602 0x00B5, 0x00B5,
603 0x00BA, 0x00BA,
604 0x00C0, 0x00D6,
605 0x00D8, 0x00F6,
606 0x00F8, 0x0220,
607 0x0222, 0x0233,
608 0x0250, 0x02AD,
609 0x02B0, 0x02B8,
610 0x02BB, 0x02C1,
611 0x02D0, 0x02D1,
612 0x02E0, 0x02E4,
613 0x02EE, 0x02EE,
614 0x037A, 0x037A,
615 0x0386, 0x0386,
616 0x0388, 0x038A,
617 0x038C, 0x038C,
618 0x038E, 0x03A1,
619 0x03A3, 0x03CE,
620 0x03D0, 0x03F5,
621 0x0400, 0x0482,
622 0x048A, 0x04CE,
623 0x04D0, 0x04F5,
624 0x04F8, 0x04F9,
625 0x0500, 0x050F,
626 0x0531, 0x0556,
627 0x0559, 0x055F,
628 0x0561, 0x0587,
629 0x0589, 0x0589,
630 0x0903, 0x0903,
631 0x0905, 0x0939,
632 0x093D, 0x0940,
633 0x0949, 0x094C,
634 0x0950, 0x0950,
635 0x0958, 0x0961,
636 0x0964, 0x0970,
637 0x0982, 0x0983,
638 0x0985, 0x098C,
639 0x098F, 0x0990,
640 0x0993, 0x09A8,
641 0x09AA, 0x09B0,
642 0x09B2, 0x09B2,
643 0x09B6, 0x09B9,
644 0x09BE, 0x09C0,
645 0x09C7, 0x09C8,
646 0x09CB, 0x09CC,
647 0x09D7, 0x09D7,
648 0x09DC, 0x09DD,
649 0x09DF, 0x09E1,
650 0x09E6, 0x09F1,
651 0x09F4, 0x09FA,
652 0x0A05, 0x0A0A,
653 0x0A0F, 0x0A10,
654 0x0A13, 0x0A28,
655 0x0A2A, 0x0A30,
656 0x0A32, 0x0A33,
657 0x0A35, 0x0A36,
658 0x0A38, 0x0A39,
659 0x0A3E, 0x0A40,
660 0x0A59, 0x0A5C,
661 0x0A5E, 0x0A5E,
662 0x0A66, 0x0A6F,
663 0x0A72, 0x0A74,
664 0x0A83, 0x0A83,
665 0x0A85, 0x0A8B,
666 0x0A8D, 0x0A8D,
667 0x0A8F, 0x0A91,
668 0x0A93, 0x0AA8,
669 0x0AAA, 0x0AB0,
670 0x0AB2, 0x0AB3,
671 0x0AB5, 0x0AB9,
672 0x0ABD, 0x0AC0,
673 0x0AC9, 0x0AC9,
674 0x0ACB, 0x0ACC,
675 0x0AD0, 0x0AD0,
676 0x0AE0, 0x0AE0,
677 0x0AE6, 0x0AEF,
678 0x0B02, 0x0B03,
679 0x0B05, 0x0B0C,
680 0x0B0F, 0x0B10,
681 0x0B13, 0x0B28,
682 0x0B2A, 0x0B30,
683 0x0B32, 0x0B33,
684 0x0B36, 0x0B39,
685 0x0B3D, 0x0B3E,
686 0x0B40, 0x0B40,
687 0x0B47, 0x0B48,
688 0x0B4B, 0x0B4C,
689 0x0B57, 0x0B57,
690 0x0B5C, 0x0B5D,
691 0x0B5F, 0x0B61,
692 0x0B66, 0x0B70,
693 0x0B83, 0x0B83,
694 0x0B85, 0x0B8A,
695 0x0B8E, 0x0B90,
696 0x0B92, 0x0B95,
697 0x0B99, 0x0B9A,
698 0x0B9C, 0x0B9C,
699 0x0B9E, 0x0B9F,
700 0x0BA3, 0x0BA4,
701 0x0BA8, 0x0BAA,
702 0x0BAE, 0x0BB5,
703 0x0BB7, 0x0BB9,
704 0x0BBE, 0x0BBF,
705 0x0BC1, 0x0BC2,
706 0x0BC6, 0x0BC8,
707 0x0BCA, 0x0BCC,
708 0x0BD7, 0x0BD7,
709 0x0BE7, 0x0BF2,
710 0x0C01, 0x0C03,
711 0x0C05, 0x0C0C,
712 0x0C0E, 0x0C10,
713 0x0C12, 0x0C28,
714 0x0C2A, 0x0C33,
715 0x0C35, 0x0C39,
716 0x0C41, 0x0C44,
717 0x0C60, 0x0C61,
718 0x0C66, 0x0C6F,
719 0x0C82, 0x0C83,
720 0x0C85, 0x0C8C,
721 0x0C8E, 0x0C90,
722 0x0C92, 0x0CA8,
723 0x0CAA, 0x0CB3,
724 0x0CB5, 0x0CB9,
725 0x0CBE, 0x0CBE,
726 0x0CC0, 0x0CC4,
727 0x0CC7, 0x0CC8,
728 0x0CCA, 0x0CCB,
729 0x0CD5, 0x0CD6,
730 0x0CDE, 0x0CDE,
731 0x0CE0, 0x0CE1,
732 0x0CE6, 0x0CEF,
733 0x0D02, 0x0D03,
734 0x0D05, 0x0D0C,
735 0x0D0E, 0x0D10,
736 0x0D12, 0x0D28,
737 0x0D2A, 0x0D39,
738 0x0D3E, 0x0D40,
739 0x0D46, 0x0D48,
740 0x0D4A, 0x0D4C,
741 0x0D57, 0x0D57,
742 0x0D60, 0x0D61,
743 0x0D66, 0x0D6F,
744 0x0D82, 0x0D83,
745 0x0D85, 0x0D96,
746 0x0D9A, 0x0DB1,
747 0x0DB3, 0x0DBB,
748 0x0DBD, 0x0DBD,
749 0x0DC0, 0x0DC6,
750 0x0DCF, 0x0DD1,
751 0x0DD8, 0x0DDF,
752 0x0DF2, 0x0DF4,
753 0x0E01, 0x0E30,
754 0x0E32, 0x0E33,
755 0x0E40, 0x0E46,
756 0x0E4F, 0x0E5B,
757 0x0E81, 0x0E82,
758 0x0E84, 0x0E84,
759 0x0E87, 0x0E88,
760 0x0E8A, 0x0E8A,
761 0x0E8D, 0x0E8D,
762 0x0E94, 0x0E97,
763 0x0E99, 0x0E9F,
764 0x0EA1, 0x0EA3,
765 0x0EA5, 0x0EA5,
766 0x0EA7, 0x0EA7,
767 0x0EAA, 0x0EAB,
768 0x0EAD, 0x0EB0,
769 0x0EB2, 0x0EB3,
770 0x0EBD, 0x0EBD,
771 0x0EC0, 0x0EC4,
772 0x0EC6, 0x0EC6,
773 0x0ED0, 0x0ED9,
774 0x0EDC, 0x0EDD,
775 0x0F00, 0x0F17,
776 0x0F1A, 0x0F34,
777 0x0F36, 0x0F36,
778 0x0F38, 0x0F38,
779 0x0F3E, 0x0F47,
780 0x0F49, 0x0F6A,
781 0x0F7F, 0x0F7F,
782 0x0F85, 0x0F85,
783 0x0F88, 0x0F8B,
784 0x0FBE, 0x0FC5,
785 0x0FC7, 0x0FCC,
786 0x0FCF, 0x0FCF,
787 0x1000, 0x1021,
788 0x1023, 0x1027,
789 0x1029, 0x102A,
790 0x102C, 0x102C,
791 0x1031, 0x1031,
792 0x1038, 0x1038,
793 0x1040, 0x1057,
794 0x10A0, 0x10C5,
795 0x10D0, 0x10F8,
796 0x10FB, 0x10FB,
797 0x1100, 0x1159,
798 0x115F, 0x11A2,
799 0x11A8, 0x11F9,
800 0x1200, 0x1206,
801 0x1208, 0x1246,
802 0x1248, 0x1248,
803 0x124A, 0x124D,
804 0x1250, 0x1256,
805 0x1258, 0x1258,
806 0x125A, 0x125D,
807 0x1260, 0x1286,
808 0x1288, 0x1288,
809 0x128A, 0x128D,
810 0x1290, 0x12AE,
811 0x12B0, 0x12B0,
812 0x12B2, 0x12B5,
813 0x12B8, 0x12BE,
814 0x12C0, 0x12C0,
815 0x12C2, 0x12C5,
816 0x12C8, 0x12CE,
817 0x12D0, 0x12D6,
818 0x12D8, 0x12EE,
819 0x12F0, 0x130E,
820 0x1310, 0x1310,
821 0x1312, 0x1315,
822 0x1318, 0x131E,
823 0x1320, 0x1346,
824 0x1348, 0x135A,
825 0x1361, 0x137C,
826 0x13A0, 0x13F4,
827 0x1401, 0x1676,
828 0x1681, 0x169A,
829 0x16A0, 0x16F0,
830 0x1700, 0x170C,
831 0x170E, 0x1711,
832 0x1720, 0x1731,
833 0x1735, 0x1736,
834 0x1740, 0x1751,
835 0x1760, 0x176C,
836 0x176E, 0x1770,
837 0x1780, 0x17B6,
838 0x17BE, 0x17C5,
839 0x17C7, 0x17C8,
840 0x17D4, 0x17DA,
841 0x17DC, 0x17DC,
842 0x17E0, 0x17E9,
843 0x1810, 0x1819,
844 0x1820, 0x1877,
845 0x1880, 0x18A8,
846 0x1E00, 0x1E9B,
847 0x1EA0, 0x1EF9,
848 0x1F00, 0x1F15,
849 0x1F18, 0x1F1D,
850 0x1F20, 0x1F45,
851 0x1F48, 0x1F4D,
852 0x1F50, 0x1F57,
853 0x1F59, 0x1F59,
854 0x1F5B, 0x1F5B,
855 0x1F5D, 0x1F5D,
856 0x1F5F, 0x1F7D,
857 0x1F80, 0x1FB4,
858 0x1FB6, 0x1FBC,
859 0x1FBE, 0x1FBE,
860 0x1FC2, 0x1FC4,
861 0x1FC6, 0x1FCC,
862 0x1FD0, 0x1FD3,
863 0x1FD6, 0x1FDB,
864 0x1FE0, 0x1FEC,
865 0x1FF2, 0x1FF4,
866 0x1FF6, 0x1FFC,
867 0x200E, 0x200E,
868 0x2071, 0x2071,
869 0x207F, 0x207F,
870 0x2102, 0x2102,
871 0x2107, 0x2107,
872 0x210A, 0x2113,
873 0x2115, 0x2115,
874 0x2119, 0x211D,
875 0x2124, 0x2124,
876 0x2126, 0x2126,
877 0x2128, 0x2128,
878 0x212A, 0x212D,
879 0x212F, 0x2131,
880 0x2133, 0x2139,
881 0x213D, 0x213F,
882 0x2145, 0x2149,
883 0x2160, 0x2183,
884 0x2336, 0x237A,
885 0x2395, 0x2395,
886 0x249C, 0x24E9,
887 0x3005, 0x3007,
888 0x3021, 0x3029,
889 0x3031, 0x3035,
890 0x3038, 0x303C,
891 0x3041, 0x3096,
892 0x309D, 0x309F,
893 0x30A1, 0x30FA,
894 0x30FC, 0x30FF,
895 0x3105, 0x312C,
896 0x3131, 0x318E,
897 0x3190, 0x31B7,
898 0x31F0, 0x321C,
899 0x3220, 0x3243,
900 0x3260, 0x327B,
901 0x327F, 0x32B0,
902 0x32C0, 0x32CB,
903 0x32D0, 0x32FE,
904 0x3300, 0x3376,
905 0x337B, 0x33DD,
906 0x33E0, 0x33FE,
907 0x3400, 0x4DB5,
908 0x4E00, 0x9FA5,
909 0xA000, 0xA48C,
910 0xAC00, 0xD7A3,
911 0xD800, 0xFA2D,
912 0xFA30, 0xFA6A,
913 0xFB00, 0xFB06,
914 0xFB13, 0xFB17,
915 0xFF21, 0xFF3A,
916 0xFF41, 0xFF5A,
917 0xFF66, 0xFFBE,
918 0xFFC2, 0xFFC7,
919 0xFFCA, 0xFFCF,
920 0xFFD2, 0xFFD7,
921 0xFFDA, 0xFFDC,
922 0x10300, 0x1031E,
923 0x10320, 0x10323,
924 0x10330, 0x1034A,
925 0x10400, 0x10425,
926 0x10428, 0x1044D,
927 0x1D000, 0x1D0F5,
928 0x1D100, 0x1D126,
929 0x1D12A, 0x1D166,
930 0x1D16A, 0x1D172,
931 0x1D183, 0x1D184,
932 0x1D18C, 0x1D1A9,
933 0x1D1AE, 0x1D1DD,
934 0x1D400, 0x1D454,
935 0x1D456, 0x1D49C,
936 0x1D49E, 0x1D49F,
937 0x1D4A2, 0x1D4A2,
938 0x1D4A5, 0x1D4A6,
939 0x1D4A9, 0x1D4AC,
940 0x1D4AE, 0x1D4B9,
941 0x1D4BB, 0x1D4BB,
942 0x1D4BD, 0x1D4C0,
943 0x1D4C2, 0x1D4C3,
944 0x1D4C5, 0x1D505,
945 0x1D507, 0x1D50A,
946 0x1D50D, 0x1D514,
947 0x1D516, 0x1D51C,
948 0x1D51E, 0x1D539,
949 0x1D53B, 0x1D53E,
950 0x1D540, 0x1D544,
951 0x1D546, 0x1D546,
952 0x1D54A, 0x1D550,
953 0x1D552, 0x1D6A3,
954 0x1D6A8, 0x1D7C9,
955 0x20000, 0x2A6D6,
956 0x2F800, 0x2FA1D,
957 0xF0000, 0xFFFFD,
958 0x100000, 0x10FFFD
961 /* End of stringprep tables */
964 /* Is the given Unicode codepoint in the given table of ranges? */
965 #define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
967 static int
968 codepoint_range_cmp(const void *a, const void *b)
970 const pg_wchar *key = (const pg_wchar *) a;
971 const pg_wchar *range = (const pg_wchar *) b;
973 if (*key < range[0])
974 return -1; /* less than lower bound */
975 if (*key > range[1])
976 return 1; /* greater than upper bound */
978 return 0; /* within range */
981 static bool
982 is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
984 Assert(mapsize % 2 == 0);
986 if (code < map[0] || code > map[mapsize - 1])
987 return false;
989 if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
990 codepoint_range_cmp))
991 return true;
992 else
993 return false;
997 * Calculate the length in characters of a null-terminated UTF-8 string.
999 * Returns -1 if the input is not valid UTF-8.
1001 static int
1002 pg_utf8_string_len(const char *source)
1004 const unsigned char *p = (const unsigned char *) source;
1005 int l;
1006 int num_chars = 0;
1008 while (*p)
1010 l = pg_utf_mblen(p);
1012 if (!pg_utf8_islegal(p, l))
1013 return -1;
1015 p += l;
1016 num_chars++;
1019 return num_chars;
1024 * pg_saslprep - Normalize a password with SASLprep.
1026 * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1027 * supports many encodings, so we don't blindly assume that. pg_saslprep
1028 * will check if the input looks like valid UTF-8, and returns
1029 * SASLPREP_INVALID_UTF8 if not.
1031 * If the string contains prohibited characters (or more precisely, if the
1032 * output string would contain prohibited characters after normalization),
1033 * returns SASLPREP_PROHIBITED.
1035 * On success, returns SASLPREP_SUCCESS, and the normalized string in
1036 * *output.
1038 * In frontend, the normalized string is malloc'd, and the caller is
1039 * responsible for freeing it. If an allocation fails, returns
1040 * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
1041 * and a failed allocation leads to ereport(ERROR).
1043 pg_saslprep_rc
1044 pg_saslprep(const char *input, char **output)
1046 pg_wchar *input_chars = NULL;
1047 pg_wchar *output_chars = NULL;
1048 int input_size;
1049 char *result;
1050 int result_size;
1051 int count;
1052 int i;
1053 bool contains_RandALCat;
1054 unsigned char *p;
1055 pg_wchar *wp;
1057 /* Ensure we return *output as NULL on failure */
1058 *output = NULL;
1061 * Quick check if the input is pure ASCII. An ASCII string requires no
1062 * further processing.
1064 if (pg_is_ascii(input))
1066 *output = STRDUP(input);
1067 if (!(*output))
1068 goto oom;
1069 return SASLPREP_SUCCESS;
1073 * Convert the input from UTF-8 to an array of Unicode codepoints.
1075 * This also checks that the input is a legal UTF-8 string.
1077 input_size = pg_utf8_string_len(input);
1078 if (input_size < 0)
1079 return SASLPREP_INVALID_UTF8;
1081 input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1082 if (!input_chars)
1083 goto oom;
1085 p = (unsigned char *) input;
1086 for (i = 0; i < input_size; i++)
1088 input_chars[i] = utf8_to_unicode(p);
1089 p += pg_utf_mblen(p);
1091 input_chars[i] = (pg_wchar) '\0';
1094 * The steps below correspond to the steps listed in [RFC3454], Section
1095 * "2. Preparation Overview"
1099 * 1) Map -- For each character in the input, check if it has a mapping
1100 * and, if so, replace it with its mapping.
1102 count = 0;
1103 for (i = 0; i < input_size; i++)
1105 pg_wchar code = input_chars[i];
1107 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
1108 input_chars[count++] = 0x0020;
1109 else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
1111 /* map to nothing */
1113 else
1114 input_chars[count++] = code;
1116 input_chars[count] = (pg_wchar) '\0';
1117 input_size = count;
1119 if (input_size == 0)
1120 goto prohibited; /* don't allow empty password */
1123 * 2) Normalize -- Normalize the result of step 1 using Unicode
1124 * normalization.
1126 output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1127 if (!output_chars)
1128 goto oom;
1131 * 3) Prohibit -- Check for any characters that are not allowed in the
1132 * output. If any are found, return an error.
1134 for (i = 0; i < input_size; i++)
1136 pg_wchar code = input_chars[i];
1138 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
1139 goto prohibited;
1140 if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
1141 goto prohibited;
1145 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1146 * any are found, make sure that the whole string satisfies the
1147 * requirements for bidirectional strings. If the string does not satisfy
1148 * the requirements for bidirectional strings, return an error.
1150 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1151 * detail what that means:
1153 * "In any profile that specifies bidirectional character handling, all
1154 * three of the following requirements MUST be met:
1156 * 1) The characters in section 5.8 MUST be prohibited.
1158 * 2) If a string contains any RandALCat character, the string MUST NOT
1159 * contain any LCat character.
1161 * 3) If a string contains any RandALCat character, a RandALCat character
1162 * MUST be the first character of the string, and a RandALCat character
1163 * MUST be the last character of the string."
1165 contains_RandALCat = false;
1166 for (i = 0; i < input_size; i++)
1168 pg_wchar code = input_chars[i];
1170 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
1172 contains_RandALCat = true;
1173 break;
1177 if (contains_RandALCat)
1179 pg_wchar first = input_chars[0];
1180 pg_wchar last = input_chars[input_size - 1];
1182 for (i = 0; i < input_size; i++)
1184 pg_wchar code = input_chars[i];
1186 if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
1187 goto prohibited;
1190 if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
1191 !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
1192 goto prohibited;
1196 * Finally, convert the result back to UTF-8.
1198 result_size = 0;
1199 for (wp = output_chars; *wp; wp++)
1201 unsigned char buf[4];
1203 unicode_to_utf8(*wp, buf);
1204 result_size += pg_utf_mblen(buf);
1207 result = ALLOC(result_size + 1);
1208 if (!result)
1209 goto oom;
1212 * There are no error exits below here, so the error exit paths don't need
1213 * to worry about possibly freeing "result".
1215 p = (unsigned char *) result;
1216 for (wp = output_chars; *wp; wp++)
1218 unicode_to_utf8(*wp, p);
1219 p += pg_utf_mblen(p);
1221 Assert((char *) p == result + result_size);
1222 *p = '\0';
1224 FREE(input_chars);
1225 FREE(output_chars);
1227 *output = result;
1228 return SASLPREP_SUCCESS;
1230 prohibited:
1231 if (input_chars)
1232 FREE(input_chars);
1233 if (output_chars)
1234 FREE(output_chars);
1236 return SASLPREP_PROHIBITED;
1238 oom:
1239 if (input_chars)
1240 FREE(input_chars);
1241 if (output_chars)
1242 FREE(output_chars);
1244 return SASLPREP_OOM;