2 * Copyright (c) 2004 Kungliga Tekniska Högskolan
3 * (Royal Institute of Technology, Stockholm, Sweden).
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the Institute nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include "normalize_table.h"
48 translation_cmp(const void *key
, const void *data
)
50 const struct translation
*t1
= (const struct translation
*)key
;
51 const struct translation
*t2
= (const struct translation
*)data
;
53 return t1
->key
- t2
->key
;
56 enum { s_base
= 0xAC00};
57 enum { s_count
= 11172};
58 enum { l_base
= 0x1100};
60 enum { v_base
= 0x1161};
62 enum { t_base
= 0x11A7};
64 enum { n_count
= v_count
* t_count
};
67 hangul_decomp(const uint32_t *in
, size_t in_len
,
68 uint32_t *out
, size_t *out_len
)
75 if (u
< s_base
|| u
>= s_base
+ s_count
)
78 l
= l_base
+ s_index
/ n_count
;
79 v
= v_base
+ (s_index
% n_count
) / t_count
;
80 t
= t_base
+ s_index
% t_count
;
85 return WIND_ERR_OVERRUN
;
95 hangul_composition(const uint32_t *in
, size_t in_len
)
99 if (in
[0] >= l_base
&& in
[0] < l_base
+ l_count
) {
100 unsigned l_index
= in
[0] - l_base
;
103 if (in
[1] < v_base
|| in
[1] >= v_base
+ v_count
)
105 v_index
= in
[1] - v_base
;
106 return (l_index
* v_count
+ v_index
) * t_count
+ s_base
;
107 } else if (in
[0] >= s_base
&& in
[0] < s_base
+ s_count
) {
108 unsigned s_index
= in
[0] - s_base
;
111 if (s_index
% t_count
!= 0)
113 if (in
[1] < t_base
|| in
[1] >= t_base
+ t_count
)
115 t_index
= in
[1] - t_base
;
116 return in
[0] + t_index
;
122 compat_decomp(const uint32_t *in
, size_t in_len
,
123 uint32_t *out
, size_t *out_len
)
128 for (i
= 0; i
< in_len
; ++i
) {
129 struct translation ts
= {in
[i
]};
130 size_t sub_len
= *out_len
- o
;
133 ret
= hangul_decomp(in
+ i
, in_len
- i
,
136 if (ret
== WIND_ERR_OVERRUN
)
140 void *s
= bsearch(&ts
,
141 _wind_normalize_table
,
142 _wind_normalize_table_size
,
143 sizeof(_wind_normalize_table
[0]),
146 const struct translation
*t
= (const struct translation
*)s
;
148 ret
= compat_decomp(_wind_normalize_val_table
+ t
->val_offset
,
156 return WIND_ERR_OVERRUN
;
167 cc_cmp(const void *a
, const void *b
)
169 const uint32_t *ua
= (const uint32_t *)a
;
170 const uint32_t *ub
= (const uint32_t *)b
;
172 return _wind_combining_class(*ua
) - _wind_combining_class(*ub
);
176 canonical_reorder(uint32_t *tmp
, size_t tmp_len
)
180 for (i
= 0; i
< tmp_len
; ++i
) {
181 int cc
= _wind_combining_class(tmp
[i
]);
185 j
< tmp_len
&& _wind_combining_class(tmp
[j
]);
188 qsort(&tmp
[i
], j
- i
, sizeof(unsigned),
196 find_composition(const uint32_t *in
, unsigned in_len
)
198 unsigned short canon_index
= 0;
202 cur
= hangul_composition(in
, in_len
);
207 const struct canon_node
*c
= &_wind_canon_table
[canon_index
];
217 if (i
< c
->next_start
|| i
>= c
->next_end
)
221 _wind_canon_next_table
[c
->next_offset
+ i
- c
->next_start
];
222 if (canon_index
!= 0) {
223 cur
= (cur
<< 4) & 0xFFFFF;
226 } while (canon_index
!= 0);
231 combine(const uint32_t *in
, size_t in_len
,
232 uint32_t *out
, size_t *out_len
)
240 for (i
= 0; i
< in_len
;) {
241 while (i
< in_len
&& (cc
= _wind_combining_class(in
[i
])) != 0) {
246 return WIND_ERR_OVERRUN
;
255 v
[0] = out
[ostarter
];
258 cc
= _wind_combining_class(in
[i
]);
259 if (old_cc
!= cc
&& (comb
= find_composition(v
, 2))) {
260 out
[ostarter
] = comb
;
261 } else if (cc
== 0) {
265 return WIND_ERR_OVERRUN
;
278 _wind_stringprep_normalize(const uint32_t *in
, size_t in_len
,
279 uint32_t *out
, size_t *out_len
)
285 tmp_len
= in_len
* 4;
286 if (tmp_len
< MAX_LENGTH_CANON
)
287 tmp_len
= MAX_LENGTH_CANON
;
288 tmp
= malloc(tmp_len
* sizeof(uint32_t));
292 ret
= compat_decomp(in
, in_len
, tmp
, &tmp_len
);
297 canonical_reorder(tmp
, tmp_len
);
298 ret
= combine(tmp
, tmp_len
, out
, out_len
);