d2d1: Implement d2d_d3d_render_target_CreateBitmap().
[wine/multimedia.git] / dlls / dwrite / analyzer.c
blob740b61edfc881ad3d23d5cc91927df63f6da892c
1 /*
2 * Text analyzer
4 * Copyright 2011 Aric Stewart for CodeWeavers
5 * Copyright 2012, 2014 Nikolay Sivov for CodeWeavers
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #define COBJMACROS
24 #include "dwrite.h"
25 #include "dwrite_private.h"
27 WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
29 extern const unsigned short wine_linebreak_table[];
31 enum scriptcode {
32 Script_Arabic = 0,
33 Script_Armenian = 1,
34 Script_Balinese = 2,
35 Script_Bengali = 3,
36 Script_Buginese = 6,
37 Script_Canadian = 8,
38 Script_Cherokee = 11,
39 Script_Controls = 12,
40 Script_Coptic = 13,
41 Script_Cyrillic = 16,
42 Script_Devanagari = 18,
43 Script_Ethiopic = 19,
44 Script_Georgian = 20,
45 Script_Glagolitic = 22,
46 Script_Greek = 23,
47 Script_Gujarati = 24,
48 Script_Gurmukhi = 25,
49 Script_Hangul = 27,
50 Script_Hebrew = 29,
51 Script_Kannada = 32,
52 Script_Khmer = 36,
53 Script_Lao = 37,
54 Script_Latin = 38,
55 Script_Lepcha = 39,
56 Script_Limbu = 40,
57 Script_Malayalam = 44,
58 Script_Mongolian = 45,
59 Script_Myanmar = 46,
60 Script_New_TaiLue = 47,
61 Script_NKo = 48,
62 Script_Ogham = 49,
63 Script_OlChiki = 50,
64 Script_Oriya = 53,
65 Script_Runic = 58,
66 Script_Sinhala = 61,
67 Script_Sundanese = 62,
68 Script_Syriac = 64,
69 Script_TaiLe = 67,
70 Script_Tamil = 68,
71 Script_Telugu = 69,
72 Script_Thaana = 70,
73 Script_Thai = 71,
74 Script_Tibetan = 72,
75 Script_Tifinagh = 73,
76 Script_Symbol = 77,
77 Script_Unknown = (UINT16)-1
80 struct script_range {
81 UINT16 script;
82 DWORD first;
83 DWORD last;
86 static const struct script_range script_ranges[] = {
87 /* C0 Controls: U+0000–U+001F */
88 /* ASCII punctuation and symbols: U+0020–U+002F */
89 /* ASCII digits: U+0030–U+0039 */
90 /* ASCII punctuation and symbols: U+003A–U+0040 */
91 { Script_Symbol, 0x00, 0x040 },
92 /* Latin uppercase: U+0041–U+005A */
93 { Script_Latin, 0x41, 0x5a },
94 /* ASCII punctuation and symbols: U+005B–U+0060 */
95 { Script_Symbol, 0x5b, 0x060 },
96 /* Latin lowercase: U+0061–U+007A */
97 { Script_Latin, 0x61, 0x7a },
98 /* ASCII punctuation and symbols, control char DEL: U+007B–U+007F */
99 { Script_Symbol, 0x7b, 0x7f },
100 /* C1 Controls: U+0080–U+009F */
101 { Script_Controls, 0x80, 0x9f },
102 /* Latin-1 Supplement: U+00A0–U+00FF */
103 /* Latin Extended-A: U+0100–U+017F */
104 /* Latin Extended-B: U+0180–U+024F */
105 /* IPA Extensions: U+0250–U+02AF */
106 /* Spacing Modifier Letters: U+02B0–U+02FF */
107 { Script_Latin, 0xa0, 0x2ff },
108 /* Combining Diacritical Marks: U+0300–U+036F */
109 { Script_Symbol, 0x300, 0x36f },
110 /* Greek: U+0370–U+03E1 */
111 { Script_Greek, 0x370, 0x3e1 },
112 /* Coptic: U+03E2–U+03Ef */
113 { Script_Coptic, 0x3e2, 0x3ef },
114 /* Greek: U+03F0–U+03FF */
115 { Script_Greek, 0x3f0, 0x3ff },
116 /* Cyrillic: U+0400–U+04FF */
117 /* Cyrillic Supplement: U+0500–U+052F */
118 /* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
119 that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
120 { Script_Cyrillic, 0x400, 0x52f },
121 /* Armenian: U+0530–U+058F */
122 { Script_Armenian, 0x530, 0x58f },
123 /* Hebrew: U+0590–U+05FF */
124 { Script_Hebrew, 0x590, 0x5ff },
125 /* Arabic: U+0600–U+06FF */
126 { Script_Arabic, 0x600, 0x6ff },
127 /* Syriac: U+0600–U+06FF */
128 { Script_Syriac, 0x700, 0x74f },
129 /* Arabic Supplement: U+0750–U+077F */
130 { Script_Arabic, 0x750, 0x77f },
131 /* Thaana: U+0780–U+07BF */
132 { Script_Thaana, 0x780, 0x7bf },
133 /* N'Ko: U+07C0–U+07FF */
134 { Script_NKo, 0x7c0, 0x7ff },
135 /* Devanagari: U+0900–U+097F */
136 { Script_Devanagari, 0x900, 0x97f },
137 /* Bengali: U+0980–U+09FF */
138 { Script_Bengali, 0x980, 0x9ff },
139 /* Gurmukhi: U+0A00–U+0A7F */
140 { Script_Gurmukhi, 0xa00, 0xa7f },
141 /* Gujarati: U+0A80–U+0AFF */
142 { Script_Gujarati, 0xa80, 0xaff },
143 /* Oriya: U+0B00–U+0B7F */
144 { Script_Oriya, 0xb00, 0xb7f },
145 /* Tamil: U+0B80–U+0BFF */
146 { Script_Tamil, 0xb80, 0xbff },
147 /* Telugu: U+0C00–U+0C7F */
148 { Script_Telugu, 0xc00, 0xc7f },
149 /* Kannada: U+0C80–U+0CFF */
150 { Script_Kannada, 0xc80, 0xcff },
151 /* Malayalam: U+0D00–U+0D7F */
152 { Script_Malayalam, 0xd00, 0xd7f },
153 /* Sinhala: U+0D80–U+0DFF */
154 { Script_Sinhala, 0xd80, 0xdff },
155 /* Thai: U+0E00–U+0E7F */
156 { Script_Thai, 0xe00, 0xe7f },
157 /* Lao: U+0E80–U+0EFF */
158 { Script_Lao, 0xe80, 0xeff },
159 /* Tibetan: U+0F00–U+0FFF */
160 { Script_Tibetan, 0xf00, 0xfff },
161 /* Myanmar: U+1000–U+109F */
162 { Script_Myanmar, 0x1000, 0x109f },
163 /* Georgian: U+10A0–U+10FF */
164 { Script_Georgian, 0x10a0, 0x10ff },
165 /* Hangul Jamo: U+1100–U+11FF */
166 { Script_Hangul, 0x1100, 0x11ff },
167 /* Ethiopic: U+1200–U+137F */
168 /* Ethiopic Extensions: U+1380–U+139F */
169 { Script_Ethiopic, 0x1200, 0x139f },
170 /* Cherokee: U+13A0–U+13FF */
171 { Script_Cherokee, 0x13a0, 0x13ff },
172 /* Canadian Aboriginal Syllabics: U+1400–U+167F */
173 { Script_Canadian, 0x1400, 0x167f },
174 /* Ogham: U+1680–U+169F */
175 { Script_Ogham, 0x1680, 0x169f },
176 /* Runic: U+16A0–U+16F0 */
177 { Script_Runic, 0x16a0, 0x16f0 },
178 /* Khmer: U+1780–U+17FF */
179 { Script_Khmer, 0x1780, 0x17ff },
180 /* Mongolian: U+1800–U+18AF */
181 { Script_Mongolian, 0x1800, 0x18af },
182 /* Limbu: U+1900–U+194F */
183 { Script_Limbu, 0x1900, 0x194f },
184 /* Tai Le: U+1950–U+197F */
185 { Script_TaiLe, 0x1950, 0x197f },
186 /* New Tai Lue: U+1980–U+19DF */
187 { Script_New_TaiLue, 0x1980, 0x19df },
188 /* Khmer Symbols: U+19E0–U+19FF */
189 { Script_Khmer, 0x19e0, 0x19ff },
190 /* Buginese: U+1A00–U+1A1F */
191 { Script_Buginese, 0x1a00, 0x1a1f },
192 /* Tai Tham: U+1A20–U+1AAF */
193 { Script_Symbol, 0x1a20, 0x1aaf },
194 /* Balinese: U+1B00–U+1B7F */
195 { Script_Balinese, 0x1b00, 0x1b7f },
196 /* Sundanese: U+1B80–U+1BBF */
197 { Script_Sundanese, 0x1b80, 0x1bbf },
198 /* Batak: U+1BC0–U+1BFF */
199 { Script_Symbol, 0x1bc0, 0x1bff },
200 /* Lepcha: U+1C00–U+1C4F */
201 { Script_Lepcha, 0x1c00, 0x1c4f },
202 /* Ol Chiki: U+1C50–U+1C7F */
203 { Script_OlChiki, 0x1c50, 0x1c7f },
204 /* Sundanese Supplement: U+1CC0–U+1CCF */
205 { Script_Symbol, 0x1cc0, 0x1ccf },
206 /* Vedic Extensions: U+1CD0-U+1CFF */
207 { Script_Devanagari, 0x1cd0, 0x1cff },
208 /* Phonetic Extensions: U+1D00–U+1DBF */
209 { Script_Latin, 0x1d00, 0x1dbf },
210 /* Combining Diacritical Marks Supplement: U+1DC0–U+1DFF */
211 { Script_Symbol, 0x1dc0, 0x1dff },
212 /* Latin Extended Additional: U+1E00–U+1EFF */
213 { Script_Latin, 0x1e00, 0x1eff },
214 /* Greek Extended: U+1F00–U+1F00 */
215 { Script_Greek, 0x1f00, 0x1fff },
216 /* General Punctuation: U+2000–U+206f */
217 /* Superscripts and Subscripts: U+2070–U+209f */
218 /* Currency Symbols: U+20A0–U+20CF */
219 /* Combining Diacritical Marks for Symbols: U+20D0–U+20FF */
220 /* Letterlike Symbols: U+2100–U+214F */
221 /* Number Forms: U+2150–U+218F */
222 /* Arrows: U+2190–U+21FF */
223 /* Mathematical Operators: U+2200–U+22FF */
224 /* Miscellaneous Technical: U+2300–U+23FF */
225 /* Control Pictures: U+2400–U+243F */
226 /* Optical Character Recognition: U+2440–U+245F */
227 /* Enclosed Alphanumerics: U+2460–U+24FF */
228 /* Box Drawing: U+2500–U+25FF */
229 /* Block Elements: U+2580–U+259F */
230 /* Geometric Shapes: U+25A0–U+25FF */
231 /* Miscellaneous Symbols: U+2600–U+26FF */
232 /* Dingbats: U+2700–U+27BF */
233 /* Miscellaneous Mathematical Symbols-A: U+27C0–U+27EF */
234 /* Supplemental Arrows-A: U+27F0–U+27FF */
235 /* Braille Patterns: U+2800–U+28FF */
236 /* Supplemental Arrows-B: U+2900–U+297F */
237 /* Miscellaneous Mathematical Symbols-B: U+2980–U+29FF */
238 /* Supplemental Mathematical Operators: U+2A00–U+2AFF */
239 /* Miscellaneous Symbols and Arrows: U+2B00–U+2BFF */
240 { Script_Symbol, 0x2000, 0x2bff },
241 /* Glagolitic: U+2C00–U+2C5F */
242 { Script_Glagolitic, 0x2c00, 0x2c5f },
243 /* Latin Extended-C: U+2C60–U+2C7F */
244 { Script_Latin, 0x2c60, 0x2c7f },
245 /* Coptic: U+2C80–U+2CFF */
246 { Script_Coptic, 0x2c80, 0x2cff },
247 /* Georgian Supplement: U+2D00–U+2D2F */
248 { Script_Georgian, 0x2d00, 0x2d2f },
249 /* Tifinagh: U+2D30–U+2D7F */
250 { Script_Tifinagh, 0x2d30, 0x2d7f },
251 /* unsupported range */
252 { Script_Unknown }
255 static UINT16 get_char_script( WCHAR c )
257 DWORD ch = c;
258 unsigned int i;
260 for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
262 const struct script_range *range = &script_ranges[i];
263 if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
264 return range->script;
267 return Script_Unknown;
270 static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
272 DWRITE_SCRIPT_ANALYSIS sa;
273 UINT32 pos, i, length;
275 if (!len) return S_OK;
277 sa.script = get_char_script(*text);
279 pos = 0;
280 length = 1;
282 for (i = 1; i < len; i++)
284 UINT16 script = get_char_script(text[i]);
286 /* Script_Latin_Symb script type is ignored when preceded or followed by another script */
287 if (sa.script == Script_Symbol) sa.script = script;
288 if (script == Script_Symbol) script = sa.script;
289 /* this is a length of a sequence to be reported next */
290 if (sa.script == script) length++;
292 if (sa.script != script)
294 HRESULT hr;
296 sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
297 hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
298 if (FAILED(hr)) return hr;
299 pos = i;
300 length = 1;
301 sa.script = script;
305 /* 1 length case or normal completion call */
306 sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
307 return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
310 struct linebreaking_state {
311 DWRITE_LINE_BREAKPOINT *breakpoints;
312 UINT32 count;
315 enum BreakConditionLocation {
316 BreakConditionBefore,
317 BreakConditionAfter
320 enum linebreaking_classes {
321 b_BK = 1,
322 b_CR,
323 b_LF,
324 b_CM,
325 b_SG,
326 b_GL,
327 b_CB,
328 b_SP,
329 b_ZW,
330 b_NL,
331 b_WJ,
332 b_JL,
333 b_JV,
334 b_JT,
335 b_H2,
336 b_H3,
337 b_XX,
338 b_OP,
339 b_CL,
340 b_CP,
341 b_QU,
342 b_NS,
343 b_EX,
344 b_SY,
345 b_IS,
346 b_PR,
347 b_PO,
348 b_NU,
349 b_AL,
350 b_ID,
351 b_IN,
352 b_HY,
353 b_BB,
354 b_BA,
355 b_SA,
356 b_AI,
357 b_B2,
358 b_HL,
359 b_CJ,
360 b_RI
363 /* "Can break" is a weak condition, stronger "may not break" and "must break" override it. Initially all conditions are
364 set to "can break" and could only be changed once. */
365 static inline void set_break_condition(UINT32 pos, enum BreakConditionLocation location, DWRITE_BREAK_CONDITION condition,
366 struct linebreaking_state *state)
368 if (location == BreakConditionBefore) {
369 if (state->breakpoints[pos].breakConditionBefore != DWRITE_BREAK_CONDITION_CAN_BREAK)
370 return;
371 state->breakpoints[pos].breakConditionBefore = condition;
372 if (pos > 0)
373 state->breakpoints[pos-1].breakConditionAfter = condition;
375 else {
376 if (state->breakpoints[pos].breakConditionAfter != DWRITE_BREAK_CONDITION_CAN_BREAK)
377 return;
378 state->breakpoints[pos].breakConditionAfter = condition;
379 if (pos + 1 < state->count)
380 state->breakpoints[pos+1].breakConditionBefore = condition;
384 static inline WCHAR get_table_entry(const unsigned short *table, WCHAR ch)
386 return table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
389 static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_BREAKPOINT *breakpoints)
391 struct linebreaking_state state;
392 short *break_class;
393 int i, j;
395 break_class = heap_alloc(count*sizeof(short));
396 if (!break_class)
397 return E_OUTOFMEMORY;
399 state.breakpoints = breakpoints;
400 state.count = count;
402 /* LB31 - allow breaks everywhere. It will be overridden if needed as
403 other rules dictate. */
404 for (i = 0; i < count; i++)
406 break_class[i] = get_table_entry(wine_linebreak_table, text[i]);
408 breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_CAN_BREAK;
409 breakpoints[i].breakConditionAfter = DWRITE_BREAK_CONDITION_CAN_BREAK;
410 breakpoints[i].isWhitespace = break_class[i] == b_BK || break_class[i] == b_ZW || break_class[i] == b_SP || isspaceW(text[i]);
411 breakpoints[i].isSoftHyphen = FALSE;
412 breakpoints[i].padding = 0;
414 /* LB1 - resolve some classes. TODO: use external algorithms for these classes. */
415 switch (break_class[i])
417 case b_AI:
418 case b_SA:
419 case b_SG:
420 case b_XX:
421 break_class[i] = b_AL;
422 break;
423 case b_CJ:
424 break_class[i] = b_NS;
425 break;
429 /* LB2 - never break at the start */
430 set_break_condition(0, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
431 /* LB3 - always break at the end. This one is ignored. */
433 for (i = 0; i < count; i++)
435 switch (break_class[i])
437 /* LB4 - LB6 */
438 case b_CR:
439 /* LB5 - don't break CR x LF */
440 if (i < count-1 && break_class[i+1] == b_LF)
442 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
443 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
444 break;
446 case b_LF:
447 case b_NL:
448 case b_BK:
449 /* LB4 - LB5 - always break after hard breaks */
450 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MUST_BREAK, &state);
451 /* LB6 - do not break before hard breaks */
452 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
453 break;
454 /* LB7 - do not break before spaces */
455 case b_SP:
456 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
457 break;
458 case b_ZW:
459 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
460 /* LB8 - break before character after zero-width space, skip spaces inbetween */
461 while (i < count-1 && break_class[i+1] == b_SP)
462 i++;
463 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
464 break;
468 /* LB9 - LB10 */
469 for (i = 0; i < count; i++)
471 if (break_class[i] == b_CM)
473 if (i > 0)
475 switch (break_class[i-1])
477 case b_SP:
478 case b_BK:
479 case b_CR:
480 case b_LF:
481 case b_NL:
482 case b_ZW:
483 break_class[i] = b_AL;
484 break;
485 default:
486 break_class[i] = break_class[i-1];
489 else break_class[i] = b_AL;
493 for (i = 0; i < count; i++)
495 switch (break_class[i])
497 /* LB11 - don't break before and after word joiner */
498 case b_WJ:
499 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
500 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
501 break;
502 /* LB12 - don't break after glue */
503 case b_GL:
504 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
505 /* LB12a */
506 if (i > 0)
508 if (break_class[i-1] != b_SP && break_class[i-1] != b_BA && break_class[i-1] != b_HY)
509 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
511 break;
512 /* LB13 */
513 case b_CL:
514 case b_CP:
515 case b_EX:
516 case b_IS:
517 case b_SY:
518 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
519 break;
520 /* LB14 */
521 case b_OP:
522 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
523 while (i < count-1 && break_class[i+1] == b_SP) {
524 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
525 i++;
527 break;
528 /* LB15 */
529 case b_QU:
530 j = i+1;
531 while (j < count-1 && break_class[j] == b_SP)
532 j++;
533 if (break_class[j] == b_OP)
534 for (; j > i; j--)
535 set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
536 break;
537 /* LB16 */
538 case b_NS:
539 j = i-1;
540 while(j > 0 && break_class[j] == b_SP)
541 j--;
542 if (break_class[j] == b_CL || break_class[j] == b_CP)
543 for (j++; j <= i; j++)
544 set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
545 break;
546 /* LB17 */
547 case b_B2:
548 j = i+1;
549 while (j < count && break_class[j] == b_SP)
550 j++;
551 if (break_class[j] == b_B2)
552 for (; j > i; j--)
553 set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
554 break;
558 for (i = 0; i < count; i++)
560 switch(break_class[i])
562 /* LB18 - break is allowed after space */
563 case b_SP:
564 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
565 break;
566 /* LB19 - don't break before or after quotation mark */
567 case b_QU:
568 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
569 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
570 break;
571 /* LB20 */
572 case b_CB:
573 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
574 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
575 break;
576 /* LB21 */
577 case b_BA:
578 case b_HY:
579 case b_NS:
580 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
581 break;
582 case b_BB:
583 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
584 break;
585 /* LB21a */
586 case b_HL:
587 if (i < count-2)
588 switch (break_class[i+1])
590 case b_HY:
591 case b_BA:
592 set_break_condition(i+1, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
594 break;
595 /* LB22 */
596 case b_IN:
597 if (i > 0)
599 switch (break_class[i-1])
601 case b_AL:
602 case b_HL:
603 case b_ID:
604 case b_IN:
605 case b_NU:
606 set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
609 break;
612 if (i < count-1)
614 /* LB23 */
615 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
616 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
617 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
618 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
619 (break_class[i] == b_NU && break_class[i+1] == b_HL))
620 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
621 /* LB24 */
622 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
623 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
624 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
625 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
626 (break_class[i] == b_PO && break_class[i+1] == b_HL))
627 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
629 /* LB25 */
630 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
631 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
632 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
633 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
634 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
635 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
636 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
637 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
638 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
639 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
640 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
641 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
642 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
643 (break_class[i] == b_SY && break_class[i+1] == b_NU))
644 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
646 /* LB26 */
647 if (break_class[i] == b_JL)
649 switch (break_class[i+1])
651 case b_JL:
652 case b_JV:
653 case b_H2:
654 case b_H3:
655 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
658 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
659 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
660 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
661 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
662 break_class[i+1] == b_JT)
663 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
665 /* LB27 */
666 switch (break_class[i])
668 case b_JL:
669 case b_JV:
670 case b_JT:
671 case b_H2:
672 case b_H3:
673 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
674 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
676 if (break_class[i] == b_PO)
678 switch (break_class[i+1])
680 case b_JL:
681 case b_JV:
682 case b_JT:
683 case b_H2:
684 case b_H3:
685 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
689 /* LB28 */
690 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
691 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
692 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
693 (break_class[i] == b_HL && break_class[i+1] == b_HL))
694 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
696 /* LB29 */
697 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
698 (break_class[i] == b_IS && break_class[i+1] == b_HL))
699 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
701 /* LB30 */
702 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
703 break_class[i+1] == b_OP)
704 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
705 if (break_class[i] == b_CP &&
706 (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
707 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
709 /* LB30a */
710 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
711 set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
715 heap_free(break_class);
716 return S_OK;
719 static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
721 TRACE("(%s %p)\n", debugstr_guid(riid), obj);
723 if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
725 *obj = iface;
726 return S_OK;
729 *obj = NULL;
730 return E_NOINTERFACE;
734 static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
736 return 2;
739 static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
741 return 1;
744 static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
745 IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
747 const WCHAR *text;
748 HRESULT hr;
749 UINT32 len;
751 TRACE("(%p %u %u %p)\n", source, position, length, sink);
753 hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
754 if (FAILED(hr)) return hr;
756 return analyze_script(text, len, sink);
759 static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
760 IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
762 FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
763 return E_NOTIMPL;
766 static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
767 IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
769 FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
770 return E_NOTIMPL;
773 static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
774 IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
776 DWRITE_LINE_BREAKPOINT *breakpoints = NULL;
777 WCHAR *buff = NULL;
778 const WCHAR *text;
779 HRESULT hr;
780 UINT32 len;
782 TRACE("(%p %u %u %p)\n", source, position, length, sink);
784 if (length == 0)
785 return S_OK;
787 /* get some, check for length */
788 text = NULL;
789 len = 0;
790 hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
791 if (FAILED(hr)) return hr;
793 if (len < length) {
794 UINT32 read;
796 buff = heap_alloc(length*sizeof(WCHAR));
797 if (!buff)
798 return E_OUTOFMEMORY;
799 memcpy(buff, text, len*sizeof(WCHAR));
800 read = len;
802 while (read < length && text) {
803 text = NULL;
804 len = 0;
805 hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, read, &text, &len);
806 if (FAILED(hr))
807 goto done;
808 memcpy(&buff[read], text, min(len, length-read)*sizeof(WCHAR));
809 read += len;
812 text = buff;
815 breakpoints = heap_alloc(length*sizeof(*breakpoints));
816 if (!breakpoints) {
817 hr = E_OUTOFMEMORY;
818 goto done;
821 hr = analyze_linebreaks(text, length, breakpoints);
822 if (FAILED(hr))
823 goto done;
825 hr = IDWriteTextAnalysisSink_SetLineBreakpoints(sink, position, length, breakpoints);
827 done:
828 heap_free(breakpoints);
829 heap_free(buff);
831 return hr;
834 static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
835 WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
836 BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
837 IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
838 UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
839 UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
840 DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
842 FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
843 length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
844 feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
845 return E_NOTIMPL;
848 static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
849 WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
850 UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
851 UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
852 DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
853 UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
855 FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
856 clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
857 is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
858 return E_NOTIMPL;
861 static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
862 WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
863 UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
864 UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
865 DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
866 DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
867 UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
869 FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
870 clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
871 transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
872 feature_ranges, glyph_advances, glyph_offsets);
873 return E_NOTIMPL;
876 static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
877 dwritetextanalyzer_QueryInterface,
878 dwritetextanalyzer_AddRef,
879 dwritetextanalyzer_Release,
880 dwritetextanalyzer_AnalyzeScript,
881 dwritetextanalyzer_AnalyzeBidi,
882 dwritetextanalyzer_AnalyzeNumberSubstitution,
883 dwritetextanalyzer_AnalyzeLineBreakpoints,
884 dwritetextanalyzer_GetGlyphs,
885 dwritetextanalyzer_GetGlyphPlacements,
886 dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
889 static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
891 HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
893 *ret = &textanalyzer;
894 return S_OK;