usp10: Update the line breaking algorithm to Unicode 6.2.0.
[wine.git] / dlls / usp10 / breaking.c
blob6e044b2b9b077c01325b357b93762be432be4e66
1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
4 * Copyright 2011 CodeWeavers, Aric Stewart
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "config.h"
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
34 #include "wine/debug.h"
35 #include "usp10_internal.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
39 extern const unsigned short wine_linebreak_table[];
41 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL, b_CJ, b_RI};
43 enum breaking_class {b_r=1, b_s, b_x};
45 static void debug_output_breaks(const short* breaks, int count)
47 if (TRACE_ON(uniscribe))
49 int i;
50 TRACE("[");
51 for (i = 0; i < count && i < 200; i++)
53 switch (breaks[i])
55 case b_x: TRACE("x"); break;
56 case b_r: TRACE("!"); break;
57 case b_s: TRACE("+"); break;
58 default: TRACE("*");
61 if (i == 200)
62 TRACE("...");
63 TRACE("]\n");
67 static inline void else_break(short* before, short class)
69 if (*before == 0) *before = class;
72 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
74 int i,j;
75 short *break_class;
76 short *break_before;
78 TRACE("In %s\n",debugstr_wn(chars,count));
80 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
81 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
83 for (i = 0; i < count; i++)
85 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
86 break_before[i] = 0;
88 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
90 la[i].fCharStop = TRUE;
91 switch (break_class[i])
93 case b_BK:
94 case b_ZW:
95 case b_SP:
96 la[i].fWhiteSpace = TRUE;
97 break;
98 case b_CM:
99 la[i].fCharStop = FALSE;
103 /* LB1 */
104 /* TODO: Have outside algorithms for these scripts */
105 for (i = 0; i < count; i++)
107 switch(break_class[i])
109 case b_AI:
110 case b_SA:
111 case b_SG:
112 case b_XX:
113 break_class[i] = b_AL;
114 case b_CJ:
115 break_class[i] = b_NS;
119 /* LB2 - LB3 */
120 break_before[0] = b_x;
121 for (i = 0; i < count; i++)
123 switch(break_class[i])
125 /* LB4 - LB6 */
126 case b_CR:
127 if (i < count-1 && break_class[i+1] == b_LF)
129 else_break(&break_before[i],b_x);
130 else_break(&break_before[i+1],b_x);
131 break;
133 case b_LF:
134 case b_NL:
135 case b_BK:
136 if (i < count-1) else_break(&break_before[i+1],b_r);
137 else_break(&break_before[i],b_x);
138 break;
139 /* LB7 */
140 case b_SP:
141 else_break(&break_before[i],b_x);
142 break;
143 case b_ZW:
144 else_break(&break_before[i],b_x);
145 /* LB8 */
146 while (i < count-1 && break_class[i+1] == b_SP)
147 i++;
148 else_break(&break_before[i],b_s);
149 break;
153 debug_output_breaks(break_before,count);
155 /* LB9 - LB10 */
156 for (i = 0; i < count; i++)
158 if (break_class[i] == b_CM)
160 if (i > 0)
162 switch (break_class[i-1])
164 case b_SP:
165 case b_BK:
166 case b_CR:
167 case b_LF:
168 case b_NL:
169 case b_ZW:
170 break_class[i] = b_AL;
171 break;
172 default:
173 break_class[i] = break_class[i-1];
176 else break_class[i] = b_AL;
180 for (i = 0; i < count; i++)
182 switch(break_class[i])
184 /* LB11 */
185 case b_WJ:
186 else_break(&break_before[i],b_x);
187 if (i < count-1)
188 else_break(&break_before[i+1],b_x);
189 break;
190 /* LB12 */
191 case b_GL:
192 if (i < count-1)
193 else_break(&break_before[i+1],b_x);
194 /* LB12a */
195 if (i > 0)
197 if (break_class[i-1] != b_SP &&
198 break_class[i-1] != b_BA &&
199 break_class[i-1] != b_HY)
200 else_break(&break_before[i],b_x);
202 break;
203 /* LB13 */
204 case b_CL:
205 case b_CP:
206 case b_EX:
207 case b_IS:
208 case b_SY:
209 else_break(&break_before[i],b_x);
210 break;
211 /* LB14 */
212 case b_OP:
213 while (i < count-1 && break_class[i+1] == b_SP)
215 else_break(&break_before[i+1],b_x);
216 i++;
218 else_break(&break_before[i+1],b_x);
219 break;
220 /* LB15 */
221 case b_QU:
222 j = i+1;
223 while (j < count-1 && break_class[j] == b_SP)
224 j++;
225 if (break_class[j] == b_OP)
227 for (; j > i; j--)
228 else_break(&break_before[j],b_x);
230 break;
231 /* LB16 */
232 case b_NS:
233 j = i-1;
234 while(j > 0 && break_class[j] == b_SP)
235 j--;
236 if (break_class[j] == b_CL || break_class[j] == b_CP)
238 for (j++; j <= i; j++)
239 else_break(&break_before[j],b_x);
241 break;
242 /* LB17 */
243 case b_B2:
244 j = i+1;
245 while (j < count && break_class[j] == b_SP)
246 j++;
247 if (break_class[j] == b_B2)
249 for (; j > i; j--)
250 else_break(&break_before[j],b_x);
252 break;
256 debug_output_breaks(break_before,count);
258 for (i = 0; i < count; i++)
260 switch(break_class[i])
262 /* LB18 */
263 case b_SP:
264 if (i < count-1)
265 else_break(&break_before[i+1],b_s);
266 break;
267 /* LB19 */
268 case b_QU:
269 else_break(&break_before[i],b_x);
270 if (i < count-1)
271 else_break(&break_before[i+1],b_x);
272 break;
273 /* LB20 */
274 case b_CB:
275 else_break(&break_before[i],b_s);
276 if (i < count-1)
277 else_break(&break_before[i+1],b_s);
278 /* LB21 */
279 case b_BA:
280 case b_HY:
281 case b_NS:
282 else_break(&break_before[i],b_x);
283 break;
284 case b_BB:
285 if (i < count-1)
286 else_break(&break_before[i+1],b_x);
287 break;
288 /* LB21a */
289 case b_HL:
290 if (i < count-2)
291 switch (break_class[i+1])
293 case b_HY:
294 case b_BA:
295 else_break(&break_before[i+2], b_x);
297 break;
298 /* LB22 */
299 case b_IN:
300 if (i > 0)
302 switch (break_class[i-1])
304 case b_AL:
305 case b_HL:
306 case b_ID:
307 case b_IN:
308 case b_NU:
309 else_break(&break_before[i], b_x);
312 break;
315 if (i < count-1)
317 /* LB23 */
318 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
319 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
320 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
321 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
322 (break_class[i] == b_NU && break_class[i+1] == b_HL))
323 else_break(&break_before[i+1],b_x);
324 /* LB24 */
325 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
326 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
327 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
328 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
329 (break_class[i] == b_PO && break_class[i+1] == b_HL))
330 else_break(&break_before[i+1],b_x);
332 /* LB25 */
333 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
334 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
335 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
336 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
337 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
338 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
339 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
340 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
341 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
342 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
343 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
344 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
345 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
346 (break_class[i] == b_SY && break_class[i+1] == b_NU))
347 else_break(&break_before[i+1],b_x);
349 /* LB26 */
350 if (break_class[i] == b_JL)
352 switch (break_class[i+1])
354 case b_JL:
355 case b_JV:
356 case b_H2:
357 case b_H3:
358 else_break(&break_before[i+1],b_x);
361 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
362 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
363 else_break(&break_before[i+1],b_x);
364 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
365 break_class[i+1] == b_JT)
366 else_break(&break_before[i+1],b_x);
368 /* LB27 */
369 switch (break_class[i])
371 case b_JL:
372 case b_JV:
373 case b_JT:
374 case b_H2:
375 case b_H3:
376 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
377 else_break(&break_before[i+1],b_x);
379 if (break_class[i] == b_PO)
381 switch (break_class[i+1])
383 case b_JL:
384 case b_JV:
385 case b_JT:
386 case b_H2:
387 case b_H3:
388 else_break(&break_before[i+1],b_x);
392 /* LB28 */
393 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
394 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
395 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
396 (break_class[i] == b_HL && break_class[i+1] == b_HL))
397 else_break(&break_before[i+1],b_x);
399 /* LB29 */
400 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
401 (break_class[i] == b_IS && break_class[i+1] == b_HL))
402 else_break(&break_before[i+1],b_x);
404 /* LB30 */
405 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
406 break_class[i+1] == b_OP)
407 else_break(&break_before[i+1],b_x);
408 if (break_class[i] == b_CP &&
409 (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
410 else_break(&break_before[i+1],b_x);
412 /* LB30a */
413 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
414 else_break(&break_before[i+1],b_x);
417 debug_output_breaks(break_before,count);
419 /* LB31 */
420 for (i = 0; i < count-1; i++)
421 else_break(&break_before[i+1],b_s);
423 debug_output_breaks(break_before,count);
424 for (i = 0; i < count; i++)
426 if (break_before[i] != b_x)
428 la[i].fSoftBreak = TRUE;
429 la[i].fWordStop = TRUE;
433 HeapFree(GetProcessHeap(), 0, break_before);
434 HeapFree(GetProcessHeap(), 0, break_class);