2 * Copyright (c) 2004 Kungliga Tekniska Högskolan
3 * (Royal Institute of Technology, Stockholm, Sweden).
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the Institute nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include "normalize_table.h"
46 translation_cmp(const void *key
, const void *data
)
48 const struct translation
*t1
= (const struct translation
*)key
;
49 const struct translation
*t2
= (const struct translation
*)data
;
51 return t1
->key
- t2
->key
;
54 enum { s_base
= 0xAC00};
55 enum { s_count
= 11172};
56 enum { l_base
= 0x1100};
58 enum { v_base
= 0x1161};
60 enum { t_base
= 0x11A7};
62 enum { n_count
= v_count
* t_count
};
65 hangul_decomp(const uint32_t *in
, size_t in_len
,
66 uint32_t *out
, size_t *out_len
)
73 if (u
< s_base
|| u
>= s_base
+ s_count
)
76 l
= l_base
+ s_index
/ n_count
;
77 v
= v_base
+ (s_index
% n_count
) / t_count
;
78 t
= t_base
+ s_index
% t_count
;
83 return WIND_ERR_OVERRUN
;
93 hangul_composition(const uint32_t *in
, size_t in_len
)
97 if (in
[0] >= l_base
&& in
[0] < l_base
+ l_count
) {
98 unsigned l_index
= in
[0] - l_base
;
101 if (in
[1] < v_base
|| in
[1] >= v_base
+ v_count
)
103 v_index
= in
[1] - v_base
;
104 return (l_index
* v_count
+ v_index
) * t_count
+ s_base
;
105 } else if (in
[0] >= s_base
&& in
[0] < s_base
+ s_count
) {
106 unsigned s_index
= in
[0] - s_base
;
109 if (s_index
% t_count
!= 0)
111 if (in
[1] < t_base
|| in
[1] >= t_base
+ t_count
)
113 t_index
= in
[1] - t_base
;
114 return in
[0] + t_index
;
120 compat_decomp(const uint32_t *in
, size_t in_len
,
121 uint32_t *out
, size_t *out_len
)
126 for (i
= 0; i
< in_len
; ++i
) {
127 struct translation ts
= {in
[i
]};
128 size_t sub_len
= *out_len
- o
;
131 ret
= hangul_decomp(in
+ i
, in_len
- i
,
134 if (ret
== WIND_ERR_OVERRUN
)
138 void *s
= bsearch(&ts
,
139 _wind_normalize_table
,
140 _wind_normalize_table_size
,
141 sizeof(_wind_normalize_table
[0]),
144 const struct translation
*t
= (const struct translation
*)s
;
146 ret
= compat_decomp(_wind_normalize_val_table
+ t
->val_offset
,
154 return WIND_ERR_OVERRUN
;
165 cc_cmp(const void *a
, const void *b
)
167 const uint32_t *ua
= (const uint32_t *)a
;
168 const uint32_t *ub
= (const uint32_t *)b
;
170 return _wind_combining_class(*ua
) - _wind_combining_class(*ub
);
174 canonical_reorder(uint32_t *tmp
, size_t tmp_len
)
178 for (i
= 0; i
< tmp_len
; ++i
) {
179 int cc
= _wind_combining_class(tmp
[i
]);
183 j
< tmp_len
&& _wind_combining_class(tmp
[j
]);
186 qsort(&tmp
[i
], j
- i
, sizeof(tmp
[0]), cc_cmp
);
193 find_composition(const uint32_t *in
, unsigned in_len
)
195 unsigned short canon_index
= 0;
199 cur
= hangul_composition(in
, in_len
);
204 const struct canon_node
*c
= &_wind_canon_table
[canon_index
];
214 if (i
< c
->next_start
|| i
>= c
->next_end
)
218 _wind_canon_next_table
[c
->next_offset
+ i
- c
->next_start
];
219 if (canon_index
!= 0) {
220 cur
= (cur
<< 4) & 0xFFFFF;
223 } while (canon_index
!= 0);
228 combine(const uint32_t *in
, size_t in_len
,
229 uint32_t *out
, size_t *out_len
)
236 for (i
= 0; i
< in_len
;) {
237 while (i
< in_len
&& _wind_combining_class(in
[i
]) != 0) {
242 return WIND_ERR_OVERRUN
;
252 v
[0] = out
[ostarter
];
255 cc
= _wind_combining_class(in
[i
]);
256 if (old_cc
!= cc
&& (comb
= find_composition(v
, 2))) {
257 out
[ostarter
] = comb
;
258 } else if (cc
== 0) {
262 return WIND_ERR_OVERRUN
;
275 _wind_stringprep_normalize(const uint32_t *in
, size_t in_len
,
276 uint32_t *out
, size_t *out_len
)
287 tmp_len
= in_len
* 4;
288 if (tmp_len
< MAX_LENGTH_CANON
)
289 tmp_len
= MAX_LENGTH_CANON
;
290 tmp
= malloc(tmp_len
* sizeof(uint32_t));
294 ret
= compat_decomp(in
, in_len
, tmp
, &tmp_len
);
299 canonical_reorder(tmp
, tmp_len
);
300 ret
= combine(tmp
, tmp_len
, out
, out_len
);