2 * Implementation of Indic Syllables for the Uniscribe Script Processor
4 * Copyright 2011 CodeWeavers, Aric Stewart
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe
);
40 static void debug_output_string(const WCHAR
*str
, unsigned int char_count
, lexical_function f
)
43 if (TRACE_ON(uniscribe
))
45 for (i
= 0; i
< char_count
; ++i
)
49 case lex_Consonant
: TRACE("C"); break;
50 case lex_Ra
: TRACE("Ra"); break;
51 case lex_Vowel
: TRACE("V"); break;
52 case lex_Nukta
: TRACE("N"); break;
53 case lex_Halant
: TRACE("H"); break;
54 case lex_ZWNJ
: TRACE("Zwnj"); break;
55 case lex_ZWJ
: TRACE("Zwj"); break;
56 case lex_Matra_post
: TRACE("Mp");break;
57 case lex_Matra_above
: TRACE("Ma");break;
58 case lex_Matra_below
: TRACE("Mb");break;
59 case lex_Matra_pre
: TRACE("Mm");break;
60 case lex_Modifier
: TRACE("Sm"); break;
61 case lex_Vedic
: TRACE("Vd"); break;
62 case lex_Anudatta
: TRACE("A"); break;
63 case lex_Composed_Vowel
: TRACE("t"); break;
72 static inline BOOL
is_matra( int type
)
74 return (type
== lex_Matra_above
|| type
== lex_Matra_below
||
75 type
== lex_Matra_pre
|| type
== lex_Matra_post
||
76 type
== lex_Composed_Vowel
);
79 static inline BOOL
is_joiner( int type
)
81 return (type
== lex_ZWJ
|| type
== lex_ZWNJ
);
84 static int consonant_header(const WCHAR
*input
, unsigned int cChar
,
85 unsigned int start
, unsigned int next
, lexical_function lex
)
87 if (!is_consonant( lex(input
[next
]) )) return -1;
89 if ((next
< cChar
) && lex(input
[next
]) == lex_Nukta
)
91 if ((next
< cChar
) && lex(input
[next
])==lex_Halant
)
94 if((next
< cChar
) && is_joiner( lex(input
[next
]) ))
96 if ((next
< cChar
) && is_consonant( lex(input
[next
]) ))
99 else if ((next
< cChar
) && is_joiner( lex(input
[next
]) ) && lex(input
[next
+1])==lex_Halant
)
102 if ((next
< cChar
) && is_consonant( lex(input
[next
]) ))
108 static int parse_consonant_syllable(const WCHAR
*input
, unsigned int cChar
,
109 unsigned int start
, unsigned int *main
, unsigned int next
, lexical_function lex
)
115 check
= consonant_header(input
,cChar
,start
,next
,lex
);
121 } while (check
!= -1);
122 if (headers
|| is_consonant( lex(input
[next
]) ))
129 if ((next
< cChar
) && lex(input
[next
]) == lex_Nukta
)
131 if ((next
< cChar
) && lex(input
[next
]) == lex_Anudatta
)
134 if ((next
< cChar
) && lex(input
[next
]) == lex_Halant
)
137 if((next
< cChar
) && is_joiner( lex(input
[next
]) ))
140 else if (next
< cChar
)
142 while((next
< cChar
) && is_matra( lex(input
[next
]) ))
144 if ((next
< cChar
) && lex(input
[next
]) == lex_Nukta
)
146 if ((next
< cChar
) && lex(input
[next
]) == lex_Halant
)
149 if ((next
< cChar
) && lex(input
[next
]) == lex_Modifier
)
151 if ((next
< cChar
) && lex(input
[next
]) == lex_Vedic
)
156 static int parse_vowel_syllable(const WCHAR
*input
, unsigned int cChar
,
157 unsigned int start
, unsigned int next
, lexical_function lex
)
159 if ((next
< cChar
) && lex(input
[next
]) == lex_Nukta
)
161 if ((next
< cChar
) && is_joiner( lex(input
[next
]) ) && lex(input
[next
+1])==lex_Halant
&& is_consonant( lex(input
[next
+2]) ))
163 else if ((next
< cChar
) && lex(input
[next
])==lex_Halant
&& is_consonant( lex(input
[next
+1]) ))
165 else if ((next
< cChar
) && lex(input
[next
])==lex_ZWJ
&& is_consonant( lex(input
[next
+1]) ))
168 if ((next
< cChar
) && is_matra( lex(input
[next
]) ))
170 while((next
< cChar
) && is_matra( lex(input
[next
]) ))
172 if ((next
< cChar
) && lex(input
[next
]) == lex_Nukta
)
174 if ((next
< cChar
) && lex(input
[next
]) == lex_Halant
)
178 if ((next
< cChar
) && lex(input
[next
]) == lex_Modifier
)
180 if ((next
< cChar
) && lex(input
[next
]) == lex_Vedic
)
185 static int Indic_process_next_syllable(const WCHAR
*input
, unsigned int cChar
,
186 unsigned int start
, unsigned int *main
, unsigned int next
, lexical_function lex
)
188 if (lex(input
[next
])==lex_Vowel
)
191 return parse_vowel_syllable(input
, cChar
, start
, next
+1, lex
);
193 else if ((cChar
> next
+3) && lex(input
[next
]) == lex_Ra
&& lex(input
[next
+1]) == lex_Halant
&& lex(input
[next
+2]) == lex_Vowel
)
196 return parse_vowel_syllable(input
, cChar
, start
, next
+3, lex
);
199 else if (start
== next
&& lex(input
[next
])==lex_NBSP
)
202 return parse_vowel_syllable(input
, cChar
, start
, next
+1, lex
);
204 else if (start
== next
&& (cChar
> next
+3) && lex(input
[next
]) == lex_Ra
&& lex(input
[next
+1]) == lex_Halant
&& lex(input
[next
+2]) == lex_NBSP
)
207 return parse_vowel_syllable(input
, cChar
, start
, next
+3, lex
);
210 return parse_consonant_syllable(input
, cChar
, start
, main
, next
, lex
);
213 static BOOL
Consonant_is_post_base_form(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
,
214 const WCHAR
*pwChar
, const IndicSyllable
*s
, lexical_function lexical
, BOOL modern
)
216 if (is_consonant(lexical(pwChar
[s
->base
])) && s
->base
> s
->start
&& lexical(pwChar
[s
->base
-1]) == lex_Halant
)
219 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, &pwChar
[s
->base
-1], 1, 2, "pstf") > 0);
223 cc
[0] = pwChar
[s
->base
];
224 cc
[1] = pwChar
[s
->base
-1];
225 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, cc
, 1, 2, "pstf") > 0);
231 static BOOL
Consonant_is_below_base_form(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
,
232 const WCHAR
*pwChar
, const IndicSyllable
*s
, lexical_function lexical
, BOOL modern
)
234 if (is_consonant(lexical(pwChar
[s
->base
])) && s
->base
> s
->start
&& lexical(pwChar
[s
->base
-1]) == lex_Halant
)
237 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, &pwChar
[s
->base
-1], 1, 2, "blwf") > 0);
241 cc
[0] = pwChar
[s
->base
];
242 cc
[1] = pwChar
[s
->base
-1];
243 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, cc
, 1, 2, "blwf") > 0);
249 static BOOL
Consonant_is_pre_base_form(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
,
250 const WCHAR
*pwChar
, const IndicSyllable
*s
, lexical_function lexical
, BOOL modern
)
252 if (is_consonant(lexical(pwChar
[s
->base
])) && s
->base
> s
->start
&& lexical(pwChar
[s
->base
-1]) == lex_Halant
)
255 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, &pwChar
[s
->base
-1], 1, 2, "pref") > 0);
259 cc
[0] = pwChar
[s
->base
];
260 cc
[1] = pwChar
[s
->base
-1];
261 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, cc
, 1, 2, "pref") > 0);
267 static BOOL
Consonant_is_ralf(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
,
268 const WCHAR
*pwChar
, const IndicSyllable
*s
, lexical_function lexical
)
270 if ((lexical(pwChar
[s
->start
])==lex_Ra
) && s
->end
> s
->start
&& lexical(pwChar
[s
->start
+1]) == lex_Halant
)
271 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc
, psa
, psc
, &pwChar
[s
->start
], 1, 2, "rphf") > 0);
275 static int FindBaseConsonant(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
,
276 const WCHAR
*input
, IndicSyllable
*s
, lexical_function lex
, BOOL modern
)
282 /* remove ralf from consideration */
283 if (Consonant_is_ralf(hdc
, psa
, psc
, input
, s
, lex
))
289 /* try to find a base consonant */
290 if (!is_consonant( lex(input
[s
->base
]) ))
292 for (i
= s
->end
; i
>= s
->start
; i
--)
293 if (is_consonant( lex(input
[i
]) ))
300 while ((blwf
= Consonant_is_below_base_form(hdc
, psa
, psc
, input
, s
, lex
, modern
)) || Consonant_is_post_base_form(hdc
, psa
, psc
, input
, s
, lex
, modern
) || (pref
= Consonant_is_pre_base_form(hdc
, psa
, psc
, input
, s
, lex
, modern
)))
302 if (blwf
&& s
->blwf
== -1)
303 s
->blwf
= s
->base
- 1;
304 if (pref
&& s
->pref
== -1)
305 s
->pref
= s
->base
- 1;
307 for (i
= s
->base
-1; i
>= s
->start
; i
--)
308 if (is_consonant( lex(input
[i
]) ))
318 if (s
->ralf
== s
->base
)
324 void Indic_ParseSyllables(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
, const WCHAR
*input
, unsigned int cChar
,
325 IndicSyllable
**syllables
, int *syllable_count
, lexical_function lex
, BOOL modern
)
327 unsigned int center
= 0;
335 ERR("Failure to have required functions\n");
339 debug_output_string(input
, cChar
, lex
);
342 while((next
< cChar
) && lex(input
[next
]) == lex_Generic
)
347 next
= Indic_process_next_syllable(input
, cChar
, 0, ¢er
, index
, lex
);
351 *syllables
= HeapReAlloc(GetProcessHeap(),0,*syllables
, sizeof(IndicSyllable
)*(*syllable_count
+1));
353 *syllables
= heap_alloc(sizeof(**syllables
));
354 (*syllables
)[*syllable_count
].start
= index
;
355 (*syllables
)[*syllable_count
].base
= center
;
356 (*syllables
)[*syllable_count
].ralf
= -1;
357 (*syllables
)[*syllable_count
].blwf
= -1;
358 (*syllables
)[*syllable_count
].pref
= -1;
359 (*syllables
)[*syllable_count
].end
= next
-1;
360 FindBaseConsonant(hdc
, psa
, psc
, input
, &(*syllables
)[*syllable_count
], lex
, modern
);
362 *syllable_count
= (*syllable_count
)+1;
364 else if (index
< cChar
)
366 TRACE("Processing failed at %i\n",index
);
370 TRACE("Processed %i of %i characters into %i syllables\n",index
,cChar
,*syllable_count
);
373 void Indic_ReorderCharacters(HDC hdc
, SCRIPT_ANALYSIS
*psa
, ScriptCache
*psc
, WCHAR
*input
, unsigned int cChar
,
374 IndicSyllable
**syllables
, int *syllable_count
, lexical_function lex
, reorder_function reorder_f
, BOOL modern
)
380 ERR("Failure to have required functions\n");
384 Indic_ParseSyllables(hdc
, psa
, psc
, input
, cChar
, syllables
, syllable_count
, lex
, modern
);
385 for (i
= 0; i
< *syllable_count
; i
++)
386 reorder_f(input
, &(*syllables
)[i
], lex
);