Merge branch 'vim' into fix/fast-join
[vim_extended.git] / src / arabic.c
blob8c1f64c4051f08f21ab5533dec031400b8efc3a6
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
11 * arabic.c: functions for Arabic language
13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
15 * --
17 * Author: Nadim Shaikli & Isam Bayazidi
21 static int A_is_a __ARGS((int cur_c));
22 static int A_is_s __ARGS((int cur_c));
23 static int A_is_f __ARGS((int cur_c));
24 static int chg_c_a2s __ARGS((int cur_c));
25 static int chg_c_a2i __ARGS((int cur_c));
26 static int chg_c_a2m __ARGS((int cur_c));
27 static int chg_c_a2f __ARGS((int cur_c));
28 static int chg_c_i2m __ARGS((int cur_c));
29 static int chg_c_f2m __ARGS((int cur_c));
30 static int chg_c_laa2i __ARGS((int hid_c));
31 static int chg_c_laa2f __ARGS((int hid_c));
32 static int half_shape __ARGS((int c));
33 static int A_firstc_laa __ARGS((int c1, int c));
34 static int A_is_harakat __ARGS((int c));
35 static int A_is_iso __ARGS((int c));
36 static int A_is_formb __ARGS((int c));
37 static int A_is_ok __ARGS((int c));
38 static int A_is_valid __ARGS((int c));
39 static int A_is_special __ARGS((int c));
43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
45 static int
46 A_is_a(cur_c)
47 int cur_c;
49 switch (cur_c)
51 case a_HAMZA:
52 case a_ALEF_MADDA:
53 case a_ALEF_HAMZA_ABOVE:
54 case a_WAW_HAMZA:
55 case a_ALEF_HAMZA_BELOW:
56 case a_YEH_HAMZA:
57 case a_ALEF:
58 case a_BEH:
59 case a_TEH_MARBUTA:
60 case a_TEH:
61 case a_THEH:
62 case a_JEEM:
63 case a_HAH:
64 case a_KHAH:
65 case a_DAL:
66 case a_THAL:
67 case a_REH:
68 case a_ZAIN:
69 case a_SEEN:
70 case a_SHEEN:
71 case a_SAD:
72 case a_DAD:
73 case a_TAH:
74 case a_ZAH:
75 case a_AIN:
76 case a_GHAIN:
77 case a_TATWEEL:
78 case a_FEH:
79 case a_QAF:
80 case a_KAF:
81 case a_LAM:
82 case a_MEEM:
83 case a_NOON:
84 case a_HEH:
85 case a_WAW:
86 case a_ALEF_MAKSURA:
87 case a_YEH:
88 return TRUE;
91 return FALSE;
96 * Returns True if c is an Isolated Form-B ARABIC letter
98 static int
99 A_is_s(cur_c)
100 int cur_c;
102 switch (cur_c)
104 case a_s_HAMZA:
105 case a_s_ALEF_MADDA:
106 case a_s_ALEF_HAMZA_ABOVE:
107 case a_s_WAW_HAMZA:
108 case a_s_ALEF_HAMZA_BELOW:
109 case a_s_YEH_HAMZA:
110 case a_s_ALEF:
111 case a_s_BEH:
112 case a_s_TEH_MARBUTA:
113 case a_s_TEH:
114 case a_s_THEH:
115 case a_s_JEEM:
116 case a_s_HAH:
117 case a_s_KHAH:
118 case a_s_DAL:
119 case a_s_THAL:
120 case a_s_REH:
121 case a_s_ZAIN:
122 case a_s_SEEN:
123 case a_s_SHEEN:
124 case a_s_SAD:
125 case a_s_DAD:
126 case a_s_TAH:
127 case a_s_ZAH:
128 case a_s_AIN:
129 case a_s_GHAIN:
130 case a_s_FEH:
131 case a_s_QAF:
132 case a_s_KAF:
133 case a_s_LAM:
134 case a_s_MEEM:
135 case a_s_NOON:
136 case a_s_HEH:
137 case a_s_WAW:
138 case a_s_ALEF_MAKSURA:
139 case a_s_YEH:
140 return TRUE;
143 return FALSE;
148 * Returns True if c is a Final shape of an ARABIC letter
150 static int
151 A_is_f(cur_c)
152 int cur_c;
154 switch (cur_c)
156 case a_f_ALEF_MADDA:
157 case a_f_ALEF_HAMZA_ABOVE:
158 case a_f_WAW_HAMZA:
159 case a_f_ALEF_HAMZA_BELOW:
160 case a_f_YEH_HAMZA:
161 case a_f_ALEF:
162 case a_f_BEH:
163 case a_f_TEH_MARBUTA:
164 case a_f_TEH:
165 case a_f_THEH:
166 case a_f_JEEM:
167 case a_f_HAH:
168 case a_f_KHAH:
169 case a_f_DAL:
170 case a_f_THAL:
171 case a_f_REH:
172 case a_f_ZAIN:
173 case a_f_SEEN:
174 case a_f_SHEEN:
175 case a_f_SAD:
176 case a_f_DAD:
177 case a_f_TAH:
178 case a_f_ZAH:
179 case a_f_AIN:
180 case a_f_GHAIN:
181 case a_f_FEH:
182 case a_f_QAF:
183 case a_f_KAF:
184 case a_f_LAM:
185 case a_f_MEEM:
186 case a_f_NOON:
187 case a_f_HEH:
188 case a_f_WAW:
189 case a_f_ALEF_MAKSURA:
190 case a_f_YEH:
191 case a_f_LAM_ALEF_MADDA_ABOVE:
192 case a_f_LAM_ALEF_HAMZA_ABOVE:
193 case a_f_LAM_ALEF_HAMZA_BELOW:
194 case a_f_LAM_ALEF:
195 return TRUE;
197 return FALSE;
202 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
204 static int
205 chg_c_a2s(cur_c)
206 int cur_c;
208 int tempc;
210 switch (cur_c)
212 case a_HAMZA:
213 tempc = a_s_HAMZA;
214 break;
215 case a_ALEF_MADDA:
216 tempc = a_s_ALEF_MADDA;
217 break;
218 case a_ALEF_HAMZA_ABOVE:
219 tempc = a_s_ALEF_HAMZA_ABOVE;
220 break;
221 case a_WAW_HAMZA:
222 tempc = a_s_WAW_HAMZA;
223 break;
224 case a_ALEF_HAMZA_BELOW:
225 tempc = a_s_ALEF_HAMZA_BELOW;
226 break;
227 case a_YEH_HAMZA:
228 tempc = a_s_YEH_HAMZA;
229 break;
230 case a_ALEF:
231 tempc = a_s_ALEF;
232 break;
233 case a_TEH_MARBUTA:
234 tempc = a_s_TEH_MARBUTA;
235 break;
236 case a_DAL:
237 tempc = a_s_DAL;
238 break;
239 case a_THAL:
240 tempc = a_s_THAL;
241 break;
242 case a_REH:
243 tempc = a_s_REH;
244 break;
245 case a_ZAIN:
246 tempc = a_s_ZAIN;
247 break;
248 case a_TATWEEL: /* exceptions */
249 tempc = cur_c;
250 break;
251 case a_WAW:
252 tempc = a_s_WAW;
253 break;
254 case a_ALEF_MAKSURA:
255 tempc = a_s_ALEF_MAKSURA;
256 break;
257 case a_BEH:
258 tempc = a_s_BEH;
259 break;
260 case a_TEH:
261 tempc = a_s_TEH;
262 break;
263 case a_THEH:
264 tempc = a_s_THEH;
265 break;
266 case a_JEEM:
267 tempc = a_s_JEEM;
268 break;
269 case a_HAH:
270 tempc = a_s_HAH;
271 break;
272 case a_KHAH:
273 tempc = a_s_KHAH;
274 break;
275 case a_SEEN:
276 tempc = a_s_SEEN;
277 break;
278 case a_SHEEN:
279 tempc = a_s_SHEEN;
280 break;
281 case a_SAD:
282 tempc = a_s_SAD;
283 break;
284 case a_DAD:
285 tempc = a_s_DAD;
286 break;
287 case a_TAH:
288 tempc = a_s_TAH;
289 break;
290 case a_ZAH:
291 tempc = a_s_ZAH;
292 break;
293 case a_AIN:
294 tempc = a_s_AIN;
295 break;
296 case a_GHAIN:
297 tempc = a_s_GHAIN;
298 break;
299 case a_FEH:
300 tempc = a_s_FEH;
301 break;
302 case a_QAF:
303 tempc = a_s_QAF;
304 break;
305 case a_KAF:
306 tempc = a_s_KAF;
307 break;
308 case a_LAM:
309 tempc = a_s_LAM;
310 break;
311 case a_MEEM:
312 tempc = a_s_MEEM;
313 break;
314 case a_NOON:
315 tempc = a_s_NOON;
316 break;
317 case a_HEH:
318 tempc = a_s_HEH;
319 break;
320 case a_YEH:
321 tempc = a_s_YEH;
322 break;
323 default:
324 tempc = 0;
327 return tempc;
332 * Change shape - from ISO-8859-6/Isolated to Initial
334 static int
335 chg_c_a2i(cur_c)
336 int cur_c;
338 int tempc;
340 switch (cur_c)
342 case a_YEH_HAMZA:
343 tempc = a_i_YEH_HAMZA;
344 break;
345 case a_HAMZA: /* exceptions */
346 tempc = a_s_HAMZA;
347 break;
348 case a_ALEF_MADDA: /* exceptions */
349 tempc = a_s_ALEF_MADDA;
350 break;
351 case a_ALEF_HAMZA_ABOVE: /* exceptions */
352 tempc = a_s_ALEF_HAMZA_ABOVE;
353 break;
354 case a_WAW_HAMZA: /* exceptions */
355 tempc = a_s_WAW_HAMZA;
356 break;
357 case a_ALEF_HAMZA_BELOW: /* exceptions */
358 tempc = a_s_ALEF_HAMZA_BELOW;
359 break;
360 case a_ALEF: /* exceptions */
361 tempc = a_s_ALEF;
362 break;
363 case a_TEH_MARBUTA: /* exceptions */
364 tempc = a_s_TEH_MARBUTA;
365 break;
366 case a_DAL: /* exceptions */
367 tempc = a_s_DAL;
368 break;
369 case a_THAL: /* exceptions */
370 tempc = a_s_THAL;
371 break;
372 case a_REH: /* exceptions */
373 tempc = a_s_REH;
374 break;
375 case a_ZAIN: /* exceptions */
376 tempc = a_s_ZAIN;
377 break;
378 case a_TATWEEL: /* exceptions */
379 tempc = cur_c;
380 break;
381 case a_WAW: /* exceptions */
382 tempc = a_s_WAW;
383 break;
384 case a_ALEF_MAKSURA: /* exceptions */
385 tempc = a_s_ALEF_MAKSURA;
386 break;
387 case a_BEH:
388 tempc = a_i_BEH;
389 break;
390 case a_TEH:
391 tempc = a_i_TEH;
392 break;
393 case a_THEH:
394 tempc = a_i_THEH;
395 break;
396 case a_JEEM:
397 tempc = a_i_JEEM;
398 break;
399 case a_HAH:
400 tempc = a_i_HAH;
401 break;
402 case a_KHAH:
403 tempc = a_i_KHAH;
404 break;
405 case a_SEEN:
406 tempc = a_i_SEEN;
407 break;
408 case a_SHEEN:
409 tempc = a_i_SHEEN;
410 break;
411 case a_SAD:
412 tempc = a_i_SAD;
413 break;
414 case a_DAD:
415 tempc = a_i_DAD;
416 break;
417 case a_TAH:
418 tempc = a_i_TAH;
419 break;
420 case a_ZAH:
421 tempc = a_i_ZAH;
422 break;
423 case a_AIN:
424 tempc = a_i_AIN;
425 break;
426 case a_GHAIN:
427 tempc = a_i_GHAIN;
428 break;
429 case a_FEH:
430 tempc = a_i_FEH;
431 break;
432 case a_QAF:
433 tempc = a_i_QAF;
434 break;
435 case a_KAF:
436 tempc = a_i_KAF;
437 break;
438 case a_LAM:
439 tempc = a_i_LAM;
440 break;
441 case a_MEEM:
442 tempc = a_i_MEEM;
443 break;
444 case a_NOON:
445 tempc = a_i_NOON;
446 break;
447 case a_HEH:
448 tempc = a_i_HEH;
449 break;
450 case a_YEH:
451 tempc = a_i_YEH;
452 break;
453 default:
454 tempc = 0;
457 return tempc;
462 * Change shape - from ISO-8859-6/Isolated to Medial
464 static int
465 chg_c_a2m(cur_c)
466 int cur_c;
468 int tempc;
470 switch (cur_c)
472 case a_HAMZA: /* exception */
473 tempc = a_s_HAMZA;
474 break;
475 case a_ALEF_MADDA: /* exception */
476 tempc = a_f_ALEF_MADDA;
477 break;
478 case a_ALEF_HAMZA_ABOVE: /* exception */
479 tempc = a_f_ALEF_HAMZA_ABOVE;
480 break;
481 case a_WAW_HAMZA: /* exception */
482 tempc = a_f_WAW_HAMZA;
483 break;
484 case a_ALEF_HAMZA_BELOW: /* exception */
485 tempc = a_f_ALEF_HAMZA_BELOW;
486 break;
487 case a_YEH_HAMZA:
488 tempc = a_m_YEH_HAMZA;
489 break;
490 case a_ALEF: /* exception */
491 tempc = a_f_ALEF;
492 break;
493 case a_BEH:
494 tempc = a_m_BEH;
495 break;
496 case a_TEH_MARBUTA: /* exception */
497 tempc = a_f_TEH_MARBUTA;
498 break;
499 case a_TEH:
500 tempc = a_m_TEH;
501 break;
502 case a_THEH:
503 tempc = a_m_THEH;
504 break;
505 case a_JEEM:
506 tempc = a_m_JEEM;
507 break;
508 case a_HAH:
509 tempc = a_m_HAH;
510 break;
511 case a_KHAH:
512 tempc = a_m_KHAH;
513 break;
514 case a_DAL: /* exception */
515 tempc = a_f_DAL;
516 break;
517 case a_THAL: /* exception */
518 tempc = a_f_THAL;
519 break;
520 case a_REH: /* exception */
521 tempc = a_f_REH;
522 break;
523 case a_ZAIN: /* exception */
524 tempc = a_f_ZAIN;
525 break;
526 case a_SEEN:
527 tempc = a_m_SEEN;
528 break;
529 case a_SHEEN:
530 tempc = a_m_SHEEN;
531 break;
532 case a_SAD:
533 tempc = a_m_SAD;
534 break;
535 case a_DAD:
536 tempc = a_m_DAD;
537 break;
538 case a_TAH:
539 tempc = a_m_TAH;
540 break;
541 case a_ZAH:
542 tempc = a_m_ZAH;
543 break;
544 case a_AIN:
545 tempc = a_m_AIN;
546 break;
547 case a_GHAIN:
548 tempc = a_m_GHAIN;
549 break;
550 case a_TATWEEL: /* exception */
551 tempc = cur_c;
552 break;
553 case a_FEH:
554 tempc = a_m_FEH;
555 break;
556 case a_QAF:
557 tempc = a_m_QAF;
558 break;
559 case a_KAF:
560 tempc = a_m_KAF;
561 break;
562 case a_LAM:
563 tempc = a_m_LAM;
564 break;
565 case a_MEEM:
566 tempc = a_m_MEEM;
567 break;
568 case a_NOON:
569 tempc = a_m_NOON;
570 break;
571 case a_HEH:
572 tempc = a_m_HEH;
573 break;
574 case a_WAW: /* exception */
575 tempc = a_f_WAW;
576 break;
577 case a_ALEF_MAKSURA: /* exception */
578 tempc = a_f_ALEF_MAKSURA;
579 break;
580 case a_YEH:
581 tempc = a_m_YEH;
582 break;
583 default:
584 tempc = 0;
587 return tempc;
592 * Change shape - from ISO-8859-6/Isolated to final
594 static int
595 chg_c_a2f(cur_c)
596 int cur_c;
598 int tempc;
600 /* NOTE: these encodings need to be accounted for
602 a_f_ALEF_MADDA;
603 a_f_ALEF_HAMZA_ABOVE;
604 a_f_ALEF_HAMZA_BELOW;
605 a_f_LAM_ALEF_MADDA_ABOVE;
606 a_f_LAM_ALEF_HAMZA_ABOVE;
607 a_f_LAM_ALEF_HAMZA_BELOW;
610 switch (cur_c)
612 case a_HAMZA: /* exception */
613 tempc = a_s_HAMZA;
614 break;
615 case a_ALEF_MADDA:
616 tempc = a_f_ALEF_MADDA;
617 break;
618 case a_ALEF_HAMZA_ABOVE:
619 tempc = a_f_ALEF_HAMZA_ABOVE;
620 break;
621 case a_WAW_HAMZA:
622 tempc = a_f_WAW_HAMZA;
623 break;
624 case a_ALEF_HAMZA_BELOW:
625 tempc = a_f_ALEF_HAMZA_BELOW;
626 break;
627 case a_YEH_HAMZA:
628 tempc = a_f_YEH_HAMZA;
629 break;
630 case a_ALEF:
631 tempc = a_f_ALEF;
632 break;
633 case a_BEH:
634 tempc = a_f_BEH;
635 break;
636 case a_TEH_MARBUTA:
637 tempc = a_f_TEH_MARBUTA;
638 break;
639 case a_TEH:
640 tempc = a_f_TEH;
641 break;
642 case a_THEH:
643 tempc = a_f_THEH;
644 break;
645 case a_JEEM:
646 tempc = a_f_JEEM;
647 break;
648 case a_HAH:
649 tempc = a_f_HAH;
650 break;
651 case a_KHAH:
652 tempc = a_f_KHAH;
653 break;
654 case a_DAL:
655 tempc = a_f_DAL;
656 break;
657 case a_THAL:
658 tempc = a_f_THAL;
659 break;
660 case a_REH:
661 tempc = a_f_REH;
662 break;
663 case a_ZAIN:
664 tempc = a_f_ZAIN;
665 break;
666 case a_SEEN:
667 tempc = a_f_SEEN;
668 break;
669 case a_SHEEN:
670 tempc = a_f_SHEEN;
671 break;
672 case a_SAD:
673 tempc = a_f_SAD;
674 break;
675 case a_DAD:
676 tempc = a_f_DAD;
677 break;
678 case a_TAH:
679 tempc = a_f_TAH;
680 break;
681 case a_ZAH:
682 tempc = a_f_ZAH;
683 break;
684 case a_AIN:
685 tempc = a_f_AIN;
686 break;
687 case a_GHAIN:
688 tempc = a_f_GHAIN;
689 break;
690 case a_TATWEEL: /* exception */
691 tempc = cur_c;
692 break;
693 case a_FEH:
694 tempc = a_f_FEH;
695 break;
696 case a_QAF:
697 tempc = a_f_QAF;
698 break;
699 case a_KAF:
700 tempc = a_f_KAF;
701 break;
702 case a_LAM:
703 tempc = a_f_LAM;
704 break;
705 case a_MEEM:
706 tempc = a_f_MEEM;
707 break;
708 case a_NOON:
709 tempc = a_f_NOON;
710 break;
711 case a_HEH:
712 tempc = a_f_HEH;
713 break;
714 case a_WAW:
715 tempc = a_f_WAW;
716 break;
717 case a_ALEF_MAKSURA:
718 tempc = a_f_ALEF_MAKSURA;
719 break;
720 case a_YEH:
721 tempc = a_f_YEH;
722 break;
723 default:
724 tempc = 0;
727 return tempc;
732 * Change shape - from Initial to Medial
734 static int
735 chg_c_i2m(cur_c)
736 int cur_c;
738 int tempc;
740 switch (cur_c)
742 case a_i_YEH_HAMZA:
743 tempc = a_m_YEH_HAMZA;
744 break;
745 case a_i_BEH:
746 tempc = a_m_BEH;
747 break;
748 case a_i_TEH:
749 tempc = a_m_TEH;
750 break;
751 case a_i_THEH:
752 tempc = a_m_THEH;
753 break;
754 case a_i_JEEM:
755 tempc = a_m_JEEM;
756 break;
757 case a_i_HAH:
758 tempc = a_m_HAH;
759 break;
760 case a_i_KHAH:
761 tempc = a_m_KHAH;
762 break;
763 case a_i_SEEN:
764 tempc = a_m_SEEN;
765 break;
766 case a_i_SHEEN:
767 tempc = a_m_SHEEN;
768 break;
769 case a_i_SAD:
770 tempc = a_m_SAD;
771 break;
772 case a_i_DAD:
773 tempc = a_m_DAD;
774 break;
775 case a_i_TAH:
776 tempc = a_m_TAH;
777 break;
778 case a_i_ZAH:
779 tempc = a_m_ZAH;
780 break;
781 case a_i_AIN:
782 tempc = a_m_AIN;
783 break;
784 case a_i_GHAIN:
785 tempc = a_m_GHAIN;
786 break;
787 case a_i_FEH:
788 tempc = a_m_FEH;
789 break;
790 case a_i_QAF:
791 tempc = a_m_QAF;
792 break;
793 case a_i_KAF:
794 tempc = a_m_KAF;
795 break;
796 case a_i_LAM:
797 tempc = a_m_LAM;
798 break;
799 case a_i_MEEM:
800 tempc = a_m_MEEM;
801 break;
802 case a_i_NOON:
803 tempc = a_m_NOON;
804 break;
805 case a_i_HEH:
806 tempc = a_m_HEH;
807 break;
808 case a_i_YEH:
809 tempc = a_m_YEH;
810 break;
811 default:
812 tempc = 0;
815 return tempc;
820 * Change shape - from Final to Medial
822 static int
823 chg_c_f2m(cur_c)
824 int cur_c;
826 int tempc;
828 switch (cur_c)
830 /* NOTE: these encodings are multi-positional, no ?
831 case a_f_ALEF_MADDA:
832 case a_f_ALEF_HAMZA_ABOVE:
833 case a_f_ALEF_HAMZA_BELOW:
835 case a_f_YEH_HAMZA:
836 tempc = a_m_YEH_HAMZA;
837 break;
838 case a_f_WAW_HAMZA: /* exceptions */
839 case a_f_ALEF:
840 case a_f_TEH_MARBUTA:
841 case a_f_DAL:
842 case a_f_THAL:
843 case a_f_REH:
844 case a_f_ZAIN:
845 case a_f_WAW:
846 case a_f_ALEF_MAKSURA:
847 tempc = cur_c;
848 break;
849 case a_f_BEH:
850 tempc = a_m_BEH;
851 break;
852 case a_f_TEH:
853 tempc = a_m_TEH;
854 break;
855 case a_f_THEH:
856 tempc = a_m_THEH;
857 break;
858 case a_f_JEEM:
859 tempc = a_m_JEEM;
860 break;
861 case a_f_HAH:
862 tempc = a_m_HAH;
863 break;
864 case a_f_KHAH:
865 tempc = a_m_KHAH;
866 break;
867 case a_f_SEEN:
868 tempc = a_m_SEEN;
869 break;
870 case a_f_SHEEN:
871 tempc = a_m_SHEEN;
872 break;
873 case a_f_SAD:
874 tempc = a_m_SAD;
875 break;
876 case a_f_DAD:
877 tempc = a_m_DAD;
878 break;
879 case a_f_TAH:
880 tempc = a_m_TAH;
881 break;
882 case a_f_ZAH:
883 tempc = a_m_ZAH;
884 break;
885 case a_f_AIN:
886 tempc = a_m_AIN;
887 break;
888 case a_f_GHAIN:
889 tempc = a_m_GHAIN;
890 break;
891 case a_f_FEH:
892 tempc = a_m_FEH;
893 break;
894 case a_f_QAF:
895 tempc = a_m_QAF;
896 break;
897 case a_f_KAF:
898 tempc = a_m_KAF;
899 break;
900 case a_f_LAM:
901 tempc = a_m_LAM;
902 break;
903 case a_f_MEEM:
904 tempc = a_m_MEEM;
905 break;
906 case a_f_NOON:
907 tempc = a_m_NOON;
908 break;
909 case a_f_HEH:
910 tempc = a_m_HEH;
911 break;
912 case a_f_YEH:
913 tempc = a_m_YEH;
914 break;
915 /* NOTE: these encodings are multi-positional, no ?
916 case a_f_LAM_ALEF_MADDA_ABOVE:
917 case a_f_LAM_ALEF_HAMZA_ABOVE:
918 case a_f_LAM_ALEF_HAMZA_BELOW:
919 case a_f_LAM_ALEF:
921 default:
922 tempc = 0;
925 return tempc;
930 * Change shape - from Combination (2 char) to an Isolated
932 static int
933 chg_c_laa2i(hid_c)
934 int hid_c;
936 int tempc;
938 switch (hid_c)
940 case a_ALEF_MADDA:
941 tempc = a_s_LAM_ALEF_MADDA_ABOVE;
942 break;
943 case a_ALEF_HAMZA_ABOVE:
944 tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
945 break;
946 case a_ALEF_HAMZA_BELOW:
947 tempc = a_s_LAM_ALEF_HAMZA_BELOW;
948 break;
949 case a_ALEF:
950 tempc = a_s_LAM_ALEF;
951 break;
952 default:
953 tempc = 0;
956 return tempc;
961 * Change shape - from Combination-Isolated to Final
963 static int
964 chg_c_laa2f(hid_c)
965 int hid_c;
967 int tempc;
969 switch (hid_c)
971 case a_ALEF_MADDA:
972 tempc = a_f_LAM_ALEF_MADDA_ABOVE;
973 break;
974 case a_ALEF_HAMZA_ABOVE:
975 tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
976 break;
977 case a_ALEF_HAMZA_BELOW:
978 tempc = a_f_LAM_ALEF_HAMZA_BELOW;
979 break;
980 case a_ALEF:
981 tempc = a_f_LAM_ALEF;
982 break;
983 default:
984 tempc = 0;
987 return tempc;
991 * Do "half-shaping" on character "c". Return zero if no shaping.
993 static int
994 half_shape(c)
995 int c;
997 if (A_is_a(c))
998 return chg_c_a2i(c);
999 if (A_is_valid(c) && A_is_f(c))
1000 return chg_c_f2m(c);
1001 return 0;
1005 * Do Arabic shaping on character "c". Returns the shaped character.
1006 * out: "ccp" points to the first byte of the character to be shaped.
1007 * in/out: "c1p" points to the first composing char for "c".
1008 * in: "prev_c" is the previous character (not shaped)
1009 * in: "prev_c1" is the first composing char for the previous char
1010 * (not shaped)
1011 * in: "next_c" is the next character (not shaped).
1014 arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
1015 int c;
1016 int *ccp;
1017 int *c1p;
1018 int prev_c;
1019 int prev_c1;
1020 int next_c;
1022 int curr_c;
1023 int shape_c;
1024 int curr_laa;
1025 int prev_laa;
1027 /* Deal only with Arabic character, pass back all others */
1028 if (!A_is_ok(c))
1029 return c;
1031 /* half-shape current and previous character */
1032 shape_c = half_shape(prev_c);
1034 /* Save away current character */
1035 curr_c = c;
1037 curr_laa = A_firstc_laa(c, *c1p);
1038 prev_laa = A_firstc_laa(prev_c, prev_c1);
1040 if (curr_laa)
1042 if (A_is_valid(prev_c) && !A_is_f(shape_c)
1043 && !A_is_s(shape_c) && !prev_laa)
1044 curr_c = chg_c_laa2f(curr_laa);
1045 else
1046 curr_c = chg_c_laa2i(curr_laa);
1048 /* Remove the composing character */
1049 *c1p = 0;
1051 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
1052 curr_c = chg_c_a2i(c);
1053 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
1054 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
1055 else if (A_is_valid(next_c))
1056 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
1057 else if (A_is_valid(prev_c))
1058 curr_c = chg_c_a2f(c);
1059 else
1060 curr_c = chg_c_a2s(c);
1062 /* Sanity check -- curr_c should, in the future, never be 0.
1063 * We should, in the future, insert a fatal error here. */
1064 if (curr_c == NUL)
1065 curr_c = c;
1067 if (curr_c != c && ccp != NULL)
1069 char_u buf[MB_MAXBYTES];
1071 /* Update the first byte of the character. */
1072 (*mb_char2bytes)(curr_c, buf);
1073 *ccp = buf[0];
1076 /* Return the shaped character */
1077 return curr_c;
1082 * A_firstc_laa returns first character of LAA combination if it exists
1084 static int
1085 A_firstc_laa(c, c1)
1086 int c; /* base character */
1087 int c1; /* first composing character */
1089 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
1090 return c1;
1091 return 0;
1096 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1097 * (harakat/tanween)
1099 static int
1100 A_is_harakat(c)
1101 int c;
1103 return (c >= a_FATHATAN && c <= a_SUKUN);
1108 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1109 * (alphabet/number/punctuation)
1111 static int
1112 A_is_iso(c)
1113 int c;
1115 return ((c >= a_HAMZA && c <= a_GHAIN)
1116 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
1117 || c == a_MINI_ALEF);
1122 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1123 * (alphabet/number/punctuation)
1125 static int
1126 A_is_formb(c)
1127 int c;
1129 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
1130 || c == a_s_KASRATAN
1131 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
1132 || c == a_BYTE_ORDER_MARK);
1137 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1139 static int
1140 A_is_ok(c)
1141 int c;
1143 return (A_is_iso(c) || A_is_formb(c));
1148 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1149 * with some exceptions/exclusions
1151 static int
1152 A_is_valid(c)
1153 int c;
1155 return (A_is_ok(c) && !A_is_special(c));
1160 * A_is_special returns TRUE if 'c' is not a special Arabic character.
1161 * Specials don't adhere to most of the rules.
1163 static int
1164 A_is_special(c)
1165 int c;
1167 return (c == a_HAMZA || c == a_s_HAMZA);