1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
11 * arabic.c: functions for Arabic language
13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
17 * Author: Nadim Shaikli & Isam Bayazidi
21 static int A_is_a
__ARGS((int cur_c
));
22 static int A_is_s
__ARGS((int cur_c
));
23 static int A_is_f
__ARGS((int cur_c
));
24 static int chg_c_a2s
__ARGS((int cur_c
));
25 static int chg_c_a2i
__ARGS((int cur_c
));
26 static int chg_c_a2m
__ARGS((int cur_c
));
27 static int chg_c_a2f
__ARGS((int cur_c
));
28 static int chg_c_i2m
__ARGS((int cur_c
));
29 static int chg_c_f2m
__ARGS((int cur_c
));
30 static int chg_c_laa2i
__ARGS((int hid_c
));
31 static int chg_c_laa2f
__ARGS((int hid_c
));
32 static int half_shape
__ARGS((int c
));
33 static int A_firstc_laa
__ARGS((int c1
, int c
));
34 static int A_is_harakat
__ARGS((int c
));
35 static int A_is_iso
__ARGS((int c
));
36 static int A_is_formb
__ARGS((int c
));
37 static int A_is_ok
__ARGS((int c
));
38 static int A_is_valid
__ARGS((int c
));
39 static int A_is_special
__ARGS((int c
));
43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
53 case a_ALEF_HAMZA_ABOVE
:
55 case a_ALEF_HAMZA_BELOW
:
96 * Returns True if c is an Isolated Form-B ARABIC letter
106 case a_s_ALEF_HAMZA_ABOVE
:
108 case a_s_ALEF_HAMZA_BELOW
:
112 case a_s_TEH_MARBUTA
:
138 case a_s_ALEF_MAKSURA
:
148 * Returns True if c is a Final shape of an ARABIC letter
157 case a_f_ALEF_HAMZA_ABOVE
:
159 case a_f_ALEF_HAMZA_BELOW
:
163 case a_f_TEH_MARBUTA
:
189 case a_f_ALEF_MAKSURA
:
191 case a_f_LAM_ALEF_MADDA_ABOVE
:
192 case a_f_LAM_ALEF_HAMZA_ABOVE
:
193 case a_f_LAM_ALEF_HAMZA_BELOW
:
202 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
216 tempc
= a_s_ALEF_MADDA
;
218 case a_ALEF_HAMZA_ABOVE
:
219 tempc
= a_s_ALEF_HAMZA_ABOVE
;
222 tempc
= a_s_WAW_HAMZA
;
224 case a_ALEF_HAMZA_BELOW
:
225 tempc
= a_s_ALEF_HAMZA_BELOW
;
228 tempc
= a_s_YEH_HAMZA
;
234 tempc
= a_s_TEH_MARBUTA
;
248 case a_TATWEEL
: /* exceptions */
255 tempc
= a_s_ALEF_MAKSURA
;
332 * Change shape - from ISO-8859-6/Isolated to Initial
343 tempc
= a_i_YEH_HAMZA
;
345 case a_HAMZA
: /* exceptions */
348 case a_ALEF_MADDA
: /* exceptions */
349 tempc
= a_s_ALEF_MADDA
;
351 case a_ALEF_HAMZA_ABOVE
: /* exceptions */
352 tempc
= a_s_ALEF_HAMZA_ABOVE
;
354 case a_WAW_HAMZA
: /* exceptions */
355 tempc
= a_s_WAW_HAMZA
;
357 case a_ALEF_HAMZA_BELOW
: /* exceptions */
358 tempc
= a_s_ALEF_HAMZA_BELOW
;
360 case a_ALEF
: /* exceptions */
363 case a_TEH_MARBUTA
: /* exceptions */
364 tempc
= a_s_TEH_MARBUTA
;
366 case a_DAL
: /* exceptions */
369 case a_THAL
: /* exceptions */
372 case a_REH
: /* exceptions */
375 case a_ZAIN
: /* exceptions */
378 case a_TATWEEL
: /* exceptions */
381 case a_WAW
: /* exceptions */
384 case a_ALEF_MAKSURA
: /* exceptions */
385 tempc
= a_s_ALEF_MAKSURA
;
462 * Change shape - from ISO-8859-6/Isolated to Medial
472 case a_HAMZA
: /* exception */
475 case a_ALEF_MADDA
: /* exception */
476 tempc
= a_f_ALEF_MADDA
;
478 case a_ALEF_HAMZA_ABOVE
: /* exception */
479 tempc
= a_f_ALEF_HAMZA_ABOVE
;
481 case a_WAW_HAMZA
: /* exception */
482 tempc
= a_f_WAW_HAMZA
;
484 case a_ALEF_HAMZA_BELOW
: /* exception */
485 tempc
= a_f_ALEF_HAMZA_BELOW
;
488 tempc
= a_m_YEH_HAMZA
;
490 case a_ALEF
: /* exception */
496 case a_TEH_MARBUTA
: /* exception */
497 tempc
= a_f_TEH_MARBUTA
;
514 case a_DAL
: /* exception */
517 case a_THAL
: /* exception */
520 case a_REH
: /* exception */
523 case a_ZAIN
: /* exception */
550 case a_TATWEEL
: /* exception */
574 case a_WAW
: /* exception */
577 case a_ALEF_MAKSURA
: /* exception */
578 tempc
= a_f_ALEF_MAKSURA
;
592 * Change shape - from ISO-8859-6/Isolated to final
600 /* NOTE: these encodings need to be accounted for
603 a_f_ALEF_HAMZA_ABOVE;
604 a_f_ALEF_HAMZA_BELOW;
605 a_f_LAM_ALEF_MADDA_ABOVE;
606 a_f_LAM_ALEF_HAMZA_ABOVE;
607 a_f_LAM_ALEF_HAMZA_BELOW;
612 case a_HAMZA
: /* exception */
616 tempc
= a_f_ALEF_MADDA
;
618 case a_ALEF_HAMZA_ABOVE
:
619 tempc
= a_f_ALEF_HAMZA_ABOVE
;
622 tempc
= a_f_WAW_HAMZA
;
624 case a_ALEF_HAMZA_BELOW
:
625 tempc
= a_f_ALEF_HAMZA_BELOW
;
628 tempc
= a_f_YEH_HAMZA
;
637 tempc
= a_f_TEH_MARBUTA
;
690 case a_TATWEEL
: /* exception */
718 tempc
= a_f_ALEF_MAKSURA
;
732 * Change shape - from Initial to Medial
743 tempc
= a_m_YEH_HAMZA
;
820 * Change shape - from Final to Medial
830 /* NOTE: these encodings are multi-positional, no ?
832 case a_f_ALEF_HAMZA_ABOVE:
833 case a_f_ALEF_HAMZA_BELOW:
836 tempc
= a_m_YEH_HAMZA
;
838 case a_f_WAW_HAMZA
: /* exceptions */
840 case a_f_TEH_MARBUTA
:
846 case a_f_ALEF_MAKSURA
:
915 /* NOTE: these encodings are multi-positional, no ?
916 case a_f_LAM_ALEF_MADDA_ABOVE:
917 case a_f_LAM_ALEF_HAMZA_ABOVE:
918 case a_f_LAM_ALEF_HAMZA_BELOW:
930 * Change shape - from Combination (2 char) to an Isolated
941 tempc
= a_s_LAM_ALEF_MADDA_ABOVE
;
943 case a_ALEF_HAMZA_ABOVE
:
944 tempc
= a_s_LAM_ALEF_HAMZA_ABOVE
;
946 case a_ALEF_HAMZA_BELOW
:
947 tempc
= a_s_LAM_ALEF_HAMZA_BELOW
;
950 tempc
= a_s_LAM_ALEF
;
961 * Change shape - from Combination-Isolated to Final
972 tempc
= a_f_LAM_ALEF_MADDA_ABOVE
;
974 case a_ALEF_HAMZA_ABOVE
:
975 tempc
= a_f_LAM_ALEF_HAMZA_ABOVE
;
977 case a_ALEF_HAMZA_BELOW
:
978 tempc
= a_f_LAM_ALEF_HAMZA_BELOW
;
981 tempc
= a_f_LAM_ALEF
;
991 * Do "half-shaping" on character "c". Return zero if no shaping.
999 if (A_is_valid(c
) && A_is_f(c
))
1000 return chg_c_f2m(c
);
1005 * Do Arabic shaping on character "c". Returns the shaped character.
1006 * out: "ccp" points to the first byte of the character to be shaped.
1007 * in/out: "c1p" points to the first composing char for "c".
1008 * in: "prev_c" is the previous character (not shaped)
1009 * in: "prev_c1" is the first composing char for the previous char
1011 * in: "next_c" is the next character (not shaped).
1014 arabic_shape(c
, ccp
, c1p
, prev_c
, prev_c1
, next_c
)
1027 /* Deal only with Arabic character, pass back all others */
1031 /* half-shape current and previous character */
1032 shape_c
= half_shape(prev_c
);
1034 /* Save away current character */
1037 curr_laa
= A_firstc_laa(c
, *c1p
);
1038 prev_laa
= A_firstc_laa(prev_c
, prev_c1
);
1042 if (A_is_valid(prev_c
) && !A_is_f(shape_c
)
1043 && !A_is_s(shape_c
) && !prev_laa
)
1044 curr_c
= chg_c_laa2f(curr_laa
);
1046 curr_c
= chg_c_laa2i(curr_laa
);
1048 /* Remove the composing character */
1051 else if (!A_is_valid(prev_c
) && A_is_valid(next_c
))
1052 curr_c
= chg_c_a2i(c
);
1053 else if (!shape_c
|| A_is_f(shape_c
) || A_is_s(shape_c
) || prev_laa
)
1054 curr_c
= A_is_valid(next_c
) ? chg_c_a2i(c
) : chg_c_a2s(c
);
1055 else if (A_is_valid(next_c
))
1056 curr_c
= A_is_iso(c
) ? chg_c_a2m(c
) : chg_c_i2m(c
);
1057 else if (A_is_valid(prev_c
))
1058 curr_c
= chg_c_a2f(c
);
1060 curr_c
= chg_c_a2s(c
);
1062 /* Sanity check -- curr_c should, in the future, never be 0.
1063 * We should, in the future, insert a fatal error here. */
1067 if (curr_c
!= c
&& ccp
!= NULL
)
1069 char_u buf
[MB_MAXBYTES
];
1071 /* Update the first byte of the character. */
1072 (*mb_char2bytes
)(curr_c
, buf
);
1076 /* Return the shaped character */
1082 * A_firstc_laa returns first character of LAA combination if it exists
1086 int c
; /* base character */
1087 int c1
; /* first composing character */
1089 if (c1
!= NUL
&& c
== a_LAM
&& !A_is_harakat(c1
))
1096 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1103 return (c
>= a_FATHATAN
&& c
<= a_SUKUN
);
1108 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1109 * (alphabet/number/punctuation)
1115 return ((c
>= a_HAMZA
&& c
<= a_GHAIN
)
1116 || (c
>= a_TATWEEL
&& c
<= a_HAMZA_BELOW
)
1117 || c
== a_MINI_ALEF
);
1122 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1123 * (alphabet/number/punctuation)
1129 return ((c
>= a_s_FATHATAN
&& c
<= a_s_DAMMATAN
)
1130 || c
== a_s_KASRATAN
1131 || (c
>= a_s_FATHA
&& c
<= a_f_LAM_ALEF
)
1132 || c
== a_BYTE_ORDER_MARK
);
1137 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1143 return (A_is_iso(c
) || A_is_formb(c
));
1148 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1149 * with some exceptions/exclusions
1155 return (A_is_ok(c
) && !A_is_special(c
));
1160 * A_is_special returns TRUE if 'c' is not a special Arabic character.
1161 * Specials don't adhere to most of the rules.
1167 return (c
== a_HAMZA
|| c
== a_s_HAMZA
);