xref: /vim-8.2.3635/src/arabic.c (revision 2bf24176)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * VIM - Vi IMproved    by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * arabic.c: functions for Arabic language
12  *
13  * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
14  *
15  * --
16  *
17  * Author: Nadim Shaikli & Isam Bayazidi
18  *
19  */
20 
21 static int  A_is_a __ARGS((int cur_c));
22 static int  A_is_s __ARGS((int cur_c));
23 static int  A_is_f __ARGS((int cur_c));
24 static int  chg_c_a2s __ARGS((int cur_c));
25 static int  chg_c_a2i __ARGS((int cur_c));
26 static int  chg_c_a2m __ARGS((int cur_c));
27 static int  chg_c_a2f __ARGS((int cur_c));
28 static int  chg_c_i2m __ARGS((int cur_c));
29 static int  chg_c_f2m __ARGS((int cur_c));
30 static int  chg_c_laa2i __ARGS((int hid_c));
31 static int  chg_c_laa2f __ARGS((int hid_c));
32 static int  half_shape __ARGS((int c));
33 static int  A_firstc_laa __ARGS((int c1, int c));
34 static int  A_is_harakat __ARGS((int c));
35 static int  A_is_iso __ARGS((int c));
36 static int  A_is_formb __ARGS((int c));
37 static int  A_is_ok __ARGS((int c));
38 static int  A_is_valid __ARGS((int c));
39 static int  A_is_special __ARGS((int c));
40 
41 
42 /*
43  * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
44  */
45     static int
46 A_is_a(cur_c)
47     int cur_c;
48 {
49     switch (cur_c)
50     {
51 	case a_HAMZA:
52 	case a_ALEF_MADDA:
53 	case a_ALEF_HAMZA_ABOVE:
54 	case a_WAW_HAMZA:
55 	case a_ALEF_HAMZA_BELOW:
56 	case a_YEH_HAMZA:
57 	case a_ALEF:
58 	case a_BEH:
59 	case a_TEH_MARBUTA:
60 	case a_TEH:
61 	case a_THEH:
62 	case a_JEEM:
63 	case a_HAH:
64 	case a_KHAH:
65 	case a_DAL:
66 	case a_THAL:
67 	case a_REH:
68 	case a_ZAIN:
69 	case a_SEEN:
70 	case a_SHEEN:
71 	case a_SAD:
72 	case a_DAD:
73 	case a_TAH:
74 	case a_ZAH:
75 	case a_AIN:
76 	case a_GHAIN:
77 	case a_TATWEEL:
78 	case a_FEH:
79 	case a_QAF:
80 	case a_KAF:
81 	case a_LAM:
82 	case a_MEEM:
83 	case a_NOON:
84 	case a_HEH:
85 	case a_WAW:
86 	case a_ALEF_MAKSURA:
87 	case a_YEH:
88 	    return TRUE;
89     }
90 
91     return FALSE;
92 }
93 
94 
95 /*
96  * Returns True if c is an Isolated Form-B ARABIC letter
97  */
98     static int
99 A_is_s(cur_c)
100     int cur_c;
101 {
102     switch (cur_c)
103     {
104 	case a_s_HAMZA:
105 	case a_s_ALEF_MADDA:
106 	case a_s_ALEF_HAMZA_ABOVE:
107 	case a_s_WAW_HAMZA:
108 	case a_s_ALEF_HAMZA_BELOW:
109 	case a_s_YEH_HAMZA:
110 	case a_s_ALEF:
111 	case a_s_BEH:
112 	case a_s_TEH_MARBUTA:
113 	case a_s_TEH:
114 	case a_s_THEH:
115 	case a_s_JEEM:
116 	case a_s_HAH:
117 	case a_s_KHAH:
118 	case a_s_DAL:
119 	case a_s_THAL:
120 	case a_s_REH:
121 	case a_s_ZAIN:
122 	case a_s_SEEN:
123 	case a_s_SHEEN:
124 	case a_s_SAD:
125 	case a_s_DAD:
126 	case a_s_TAH:
127 	case a_s_ZAH:
128 	case a_s_AIN:
129 	case a_s_GHAIN:
130 	case a_s_FEH:
131 	case a_s_QAF:
132 	case a_s_KAF:
133 	case a_s_LAM:
134 	case a_s_MEEM:
135 	case a_s_NOON:
136 	case a_s_HEH:
137 	case a_s_WAW:
138 	case a_s_ALEF_MAKSURA:
139 	case a_s_YEH:
140 	    return TRUE;
141     }
142 
143     return FALSE;
144 }
145 
146 
147 /*
148  * Returns True if c is a Final shape of an ARABIC letter
149  */
150     static int
151 A_is_f(cur_c)
152     int cur_c;
153 {
154     switch (cur_c)
155     {
156 	case a_f_ALEF_MADDA:
157 	case a_f_ALEF_HAMZA_ABOVE:
158 	case a_f_WAW_HAMZA:
159 	case a_f_ALEF_HAMZA_BELOW:
160 	case a_f_YEH_HAMZA:
161 	case a_f_ALEF:
162 	case a_f_BEH:
163 	case a_f_TEH_MARBUTA:
164 	case a_f_TEH:
165 	case a_f_THEH:
166 	case a_f_JEEM:
167 	case a_f_HAH:
168 	case a_f_KHAH:
169 	case a_f_DAL:
170 	case a_f_THAL:
171 	case a_f_REH:
172 	case a_f_ZAIN:
173 	case a_f_SEEN:
174 	case a_f_SHEEN:
175 	case a_f_SAD:
176 	case a_f_DAD:
177 	case a_f_TAH:
178 	case a_f_ZAH:
179 	case a_f_AIN:
180 	case a_f_GHAIN:
181 	case a_f_FEH:
182 	case a_f_QAF:
183 	case a_f_KAF:
184 	case a_f_LAM:
185 	case a_f_MEEM:
186 	case a_f_NOON:
187 	case a_f_HEH:
188 	case a_f_WAW:
189 	case a_f_ALEF_MAKSURA:
190 	case a_f_YEH:
191 	case a_f_LAM_ALEF_MADDA_ABOVE:
192 	case a_f_LAM_ALEF_HAMZA_ABOVE:
193 	case a_f_LAM_ALEF_HAMZA_BELOW:
194 	case a_f_LAM_ALEF:
195 	    return TRUE;
196     }
197     return FALSE;
198 }
199 
200 
201 /*
202  * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
203  */
204     static int
205 chg_c_a2s(cur_c)
206     int cur_c;
207 {
208     int tempc;
209 
210     switch (cur_c)
211     {
212 	case a_HAMZA:
213 	    tempc = a_s_HAMZA;
214 	    break;
215 	case a_ALEF_MADDA:
216 	    tempc = a_s_ALEF_MADDA;
217 	    break;
218 	case a_ALEF_HAMZA_ABOVE:
219 	    tempc = a_s_ALEF_HAMZA_ABOVE;
220 	    break;
221 	case a_WAW_HAMZA:
222 	    tempc = a_s_WAW_HAMZA;
223 	    break;
224 	case a_ALEF_HAMZA_BELOW:
225 	    tempc = a_s_ALEF_HAMZA_BELOW;
226 	    break;
227 	case a_YEH_HAMZA:
228 	    tempc = a_s_YEH_HAMZA;
229 	    break;
230 	case a_ALEF:
231 	    tempc = a_s_ALEF;
232 	    break;
233 	case a_TEH_MARBUTA:
234 	    tempc = a_s_TEH_MARBUTA;
235 	    break;
236 	case a_DAL:
237 	    tempc = a_s_DAL;
238 	    break;
239 	case a_THAL:
240 	    tempc = a_s_THAL;
241 	    break;
242 	case a_REH:
243 	    tempc = a_s_REH;
244 	    break;
245 	case a_ZAIN:
246 	    tempc = a_s_ZAIN;
247 	    break;
248 	case a_TATWEEL:			/* exceptions */
249 	    tempc = cur_c;
250 	    break;
251 	case a_WAW:
252 	    tempc = a_s_WAW;
253 	    break;
254 	case a_ALEF_MAKSURA:
255 	    tempc = a_s_ALEF_MAKSURA;
256 	    break;
257 	case a_BEH:
258 	    tempc = a_s_BEH;
259 	    break;
260 	case a_TEH:
261 	    tempc = a_s_TEH;
262 	    break;
263 	case a_THEH:
264 	    tempc = a_s_THEH;
265 	    break;
266 	case a_JEEM:
267 	    tempc = a_s_JEEM;
268 	    break;
269 	case a_HAH:
270 	    tempc = a_s_HAH;
271 	    break;
272 	case a_KHAH:
273 	    tempc = a_s_KHAH;
274 	    break;
275 	case a_SEEN:
276 	    tempc = a_s_SEEN;
277 	    break;
278 	case a_SHEEN:
279 	    tempc = a_s_SHEEN;
280 	    break;
281 	case a_SAD:
282 	    tempc = a_s_SAD;
283 	    break;
284 	case a_DAD:
285 	    tempc = a_s_DAD;
286 	    break;
287 	case a_TAH:
288 	    tempc = a_s_TAH;
289 	    break;
290 	case a_ZAH:
291 	    tempc = a_s_ZAH;
292 	    break;
293 	case a_AIN:
294 	    tempc = a_s_AIN;
295 	    break;
296 	case a_GHAIN:
297 	    tempc = a_s_GHAIN;
298 	    break;
299 	case a_FEH:
300 	    tempc = a_s_FEH;
301 	    break;
302 	case a_QAF:
303 	    tempc = a_s_QAF;
304 	    break;
305 	case a_KAF:
306 	    tempc = a_s_KAF;
307 	    break;
308 	case a_LAM:
309 	    tempc = a_s_LAM;
310 	    break;
311 	case a_MEEM:
312 	    tempc = a_s_MEEM;
313 	    break;
314 	case a_NOON:
315 	    tempc = a_s_NOON;
316 	    break;
317 	case a_HEH:
318 	    tempc = a_s_HEH;
319 	    break;
320 	case a_YEH:
321 	    tempc = a_s_YEH;
322 	    break;
323 	default:
324 	    tempc = 0;
325     }
326 
327     return tempc;
328 }
329 
330 
331 /*
332  * Change shape - from ISO-8859-6/Isolated to Initial
333  */
334     static int
335 chg_c_a2i(cur_c)
336     int cur_c;
337 {
338     int tempc;
339 
340     switch (cur_c)
341     {
342 	case a_YEH_HAMZA:
343 	    tempc = a_i_YEH_HAMZA;
344 	    break;
345 	case a_HAMZA:			/* exceptions */
346 	    tempc = a_s_HAMZA;
347 	    break;
348 	case a_ALEF_MADDA:		/* exceptions */
349 	    tempc = a_s_ALEF_MADDA;
350 	    break;
351 	case a_ALEF_HAMZA_ABOVE:	/* exceptions */
352 	    tempc = a_s_ALEF_HAMZA_ABOVE;
353 	    break;
354 	case a_WAW_HAMZA:		/* exceptions */
355 	    tempc = a_s_WAW_HAMZA;
356 	    break;
357 	case a_ALEF_HAMZA_BELOW:	/* exceptions */
358 	    tempc = a_s_ALEF_HAMZA_BELOW;
359 	    break;
360 	case a_ALEF:			/* exceptions */
361 	    tempc = a_s_ALEF;
362 	    break;
363 	case a_TEH_MARBUTA:		/* exceptions */
364 	    tempc = a_s_TEH_MARBUTA;
365 	    break;
366 	case a_DAL:			/* exceptions */
367 	    tempc = a_s_DAL;
368 	    break;
369 	case a_THAL:			/* exceptions */
370 	    tempc = a_s_THAL;
371 	    break;
372 	case a_REH:			/* exceptions */
373 	    tempc = a_s_REH;
374 	    break;
375 	case a_ZAIN:			/* exceptions */
376 	    tempc = a_s_ZAIN;
377 	    break;
378 	case a_TATWEEL:			/* exceptions */
379 	    tempc = cur_c;
380 	    break;
381 	case a_WAW:			/* exceptions */
382 	    tempc = a_s_WAW;
383 	    break;
384 	case a_ALEF_MAKSURA:		/* exceptions */
385 	    tempc = a_s_ALEF_MAKSURA;
386 	    break;
387 	case a_BEH:
388 	    tempc = a_i_BEH;
389 	    break;
390 	case a_TEH:
391 	    tempc = a_i_TEH;
392 	    break;
393 	case a_THEH:
394 	    tempc = a_i_THEH;
395 	    break;
396 	case a_JEEM:
397 	    tempc = a_i_JEEM;
398 	    break;
399 	case a_HAH:
400 	    tempc = a_i_HAH;
401 	    break;
402 	case a_KHAH:
403 	    tempc = a_i_KHAH;
404 	    break;
405 	case a_SEEN:
406 	    tempc = a_i_SEEN;
407 	    break;
408 	case a_SHEEN:
409 	    tempc = a_i_SHEEN;
410 	    break;
411 	case a_SAD:
412 	    tempc = a_i_SAD;
413 	    break;
414 	case a_DAD:
415 	    tempc = a_i_DAD;
416 	    break;
417 	case a_TAH:
418 	    tempc = a_i_TAH;
419 	    break;
420 	case a_ZAH:
421 	    tempc = a_i_ZAH;
422 	    break;
423 	case a_AIN:
424 	    tempc = a_i_AIN;
425 	    break;
426 	case a_GHAIN:
427 	    tempc = a_i_GHAIN;
428 	    break;
429 	case a_FEH:
430 	    tempc = a_i_FEH;
431 	    break;
432 	case a_QAF:
433 	    tempc = a_i_QAF;
434 	    break;
435 	case a_KAF:
436 	    tempc = a_i_KAF;
437 	    break;
438 	case a_LAM:
439 	    tempc = a_i_LAM;
440 	    break;
441 	case a_MEEM:
442 	    tempc = a_i_MEEM;
443 	    break;
444 	case a_NOON:
445 	    tempc = a_i_NOON;
446 	    break;
447 	case a_HEH:
448 	    tempc = a_i_HEH;
449 	    break;
450 	case a_YEH:
451 	    tempc = a_i_YEH;
452 	    break;
453 	default:
454 	    tempc = 0;
455     }
456 
457     return tempc;
458 }
459 
460 
461 /*
462  * Change shape - from ISO-8859-6/Isolated to Medial
463  */
464     static int
465 chg_c_a2m(cur_c)
466     int cur_c;
467 {
468     int tempc;
469 
470     switch (cur_c)
471     {
472 	case a_HAMZA:			/* exception */
473 	    tempc = a_s_HAMZA;
474 	    break;
475 	case a_ALEF_MADDA:		/* exception */
476 	    tempc = a_f_ALEF_MADDA;
477 	    break;
478 	case a_ALEF_HAMZA_ABOVE:	/* exception */
479 	    tempc = a_f_ALEF_HAMZA_ABOVE;
480 	    break;
481 	case a_WAW_HAMZA:		/* exception */
482 	    tempc = a_f_WAW_HAMZA;
483 	    break;
484 	case a_ALEF_HAMZA_BELOW:	/* exception */
485 	    tempc = a_f_ALEF_HAMZA_BELOW;
486 	    break;
487 	case a_YEH_HAMZA:
488 	    tempc = a_m_YEH_HAMZA;
489 	    break;
490 	case a_ALEF:			/* exception */
491 	    tempc = a_f_ALEF;
492 	    break;
493 	case a_BEH:
494 	    tempc = a_m_BEH;
495 	    break;
496 	case a_TEH_MARBUTA:		/* exception */
497 	    tempc = a_f_TEH_MARBUTA;
498 	    break;
499 	case a_TEH:
500 	    tempc = a_m_TEH;
501 	    break;
502 	case a_THEH:
503 	    tempc = a_m_THEH;
504 	    break;
505 	case a_JEEM:
506 	    tempc = a_m_JEEM;
507 	    break;
508 	case a_HAH:
509 	    tempc = a_m_HAH;
510 	    break;
511 	case a_KHAH:
512 	    tempc = a_m_KHAH;
513 	    break;
514 	case a_DAL:			/* exception */
515 	    tempc = a_f_DAL;
516 	    break;
517 	case a_THAL:			/* exception */
518 	    tempc = a_f_THAL;
519 	    break;
520 	case a_REH:			/* exception */
521 	    tempc = a_f_REH;
522 	    break;
523 	case a_ZAIN:			/* exception */
524 	    tempc = a_f_ZAIN;
525 	    break;
526 	case a_SEEN:
527 	    tempc = a_m_SEEN;
528 	    break;
529 	case a_SHEEN:
530 	    tempc = a_m_SHEEN;
531 	    break;
532 	case a_SAD:
533 	    tempc = a_m_SAD;
534 	    break;
535 	case a_DAD:
536 	    tempc = a_m_DAD;
537 	    break;
538 	case a_TAH:
539 	    tempc = a_m_TAH;
540 	    break;
541 	case a_ZAH:
542 	    tempc = a_m_ZAH;
543 	    break;
544 	case a_AIN:
545 	    tempc = a_m_AIN;
546 	    break;
547 	case a_GHAIN:
548 	    tempc = a_m_GHAIN;
549 	    break;
550 	case a_TATWEEL:			/* exception */
551 	    tempc = cur_c;
552 	    break;
553 	case a_FEH:
554 	    tempc = a_m_FEH;
555 	    break;
556 	case a_QAF:
557 	    tempc = a_m_QAF;
558 	    break;
559 	case a_KAF:
560 	    tempc = a_m_KAF;
561 	    break;
562 	case a_LAM:
563 	    tempc = a_m_LAM;
564 	    break;
565 	case a_MEEM:
566 	    tempc = a_m_MEEM;
567 	    break;
568 	case a_NOON:
569 	    tempc = a_m_NOON;
570 	    break;
571 	case a_HEH:
572 	    tempc = a_m_HEH;
573 	    break;
574 	case a_WAW:			/* exception */
575 	    tempc = a_f_WAW;
576 	    break;
577 	case a_ALEF_MAKSURA:		/* exception */
578 	    tempc = a_f_ALEF_MAKSURA;
579 	    break;
580 	case a_YEH:
581 	    tempc = a_m_YEH;
582 	    break;
583 	default:
584 	    tempc = 0;
585     }
586 
587     return tempc;
588 }
589 
590 
591 /*
592  * Change shape - from ISO-8859-6/Isolated to final
593  */
594     static int
595 chg_c_a2f(cur_c)
596     int cur_c;
597 {
598     int tempc;
599 
600     /* NOTE: these encodings need to be accounted for
601 
602 	a_f_ALEF_MADDA;
603 	a_f_ALEF_HAMZA_ABOVE;
604 	a_f_ALEF_HAMZA_BELOW;
605 	a_f_LAM_ALEF_MADDA_ABOVE;
606 	a_f_LAM_ALEF_HAMZA_ABOVE;
607 	a_f_LAM_ALEF_HAMZA_BELOW;
608 	*/
609 
610     switch (cur_c)
611     {
612 	case a_HAMZA:			/* exception */
613 	    tempc = a_s_HAMZA;
614 	    break;
615 	case a_ALEF_MADDA:
616 	    tempc = a_f_ALEF_MADDA;
617 	    break;
618 	case a_ALEF_HAMZA_ABOVE:
619 	    tempc = a_f_ALEF_HAMZA_ABOVE;
620 	    break;
621 	case a_WAW_HAMZA:
622 	    tempc = a_f_WAW_HAMZA;
623 	    break;
624 	case a_ALEF_HAMZA_BELOW:
625 	    tempc = a_f_ALEF_HAMZA_BELOW;
626 	    break;
627 	case a_YEH_HAMZA:
628 	    tempc = a_f_YEH_HAMZA;
629 	    break;
630 	case a_ALEF:
631 	    tempc = a_f_ALEF;
632 	    break;
633 	case a_BEH:
634 	    tempc = a_f_BEH;
635 	    break;
636 	case a_TEH_MARBUTA:
637 	    tempc = a_f_TEH_MARBUTA;
638 	    break;
639 	case a_TEH:
640 	    tempc = a_f_TEH;
641 	    break;
642 	case a_THEH:
643 	    tempc = a_f_THEH;
644 	    break;
645 	case a_JEEM:
646 	    tempc = a_f_JEEM;
647 	    break;
648 	case a_HAH:
649 	    tempc = a_f_HAH;
650 	    break;
651 	case a_KHAH:
652 	    tempc = a_f_KHAH;
653 	    break;
654 	case a_DAL:
655 	    tempc = a_f_DAL;
656 	    break;
657 	case a_THAL:
658 	    tempc = a_f_THAL;
659 	    break;
660 	case a_REH:
661 	    tempc = a_f_REH;
662 	    break;
663 	case a_ZAIN:
664 	    tempc = a_f_ZAIN;
665 	    break;
666 	case a_SEEN:
667 	    tempc = a_f_SEEN;
668 	    break;
669 	case a_SHEEN:
670 	    tempc = a_f_SHEEN;
671 	    break;
672 	case a_SAD:
673 	    tempc = a_f_SAD;
674 	    break;
675 	case a_DAD:
676 	    tempc = a_f_DAD;
677 	    break;
678 	case a_TAH:
679 	    tempc = a_f_TAH;
680 	    break;
681 	case a_ZAH:
682 	    tempc = a_f_ZAH;
683 	    break;
684 	case a_AIN:
685 	    tempc = a_f_AIN;
686 	    break;
687 	case a_GHAIN:
688 	    tempc = a_f_GHAIN;
689 	    break;
690 	case a_TATWEEL:			/* exception */
691 	    tempc = cur_c;
692 	    break;
693 	case a_FEH:
694 	    tempc = a_f_FEH;
695 	    break;
696 	case a_QAF:
697 	    tempc = a_f_QAF;
698 	    break;
699 	case a_KAF:
700 	    tempc = a_f_KAF;
701 	    break;
702 	case a_LAM:
703 	    tempc = a_f_LAM;
704 	    break;
705 	case a_MEEM:
706 	    tempc = a_f_MEEM;
707 	    break;
708 	case a_NOON:
709 	    tempc = a_f_NOON;
710 	    break;
711 	case a_HEH:
712 	    tempc = a_f_HEH;
713 	    break;
714 	case a_WAW:
715 	    tempc = a_f_WAW;
716 	    break;
717 	case a_ALEF_MAKSURA:
718 	    tempc = a_f_ALEF_MAKSURA;
719 	    break;
720 	case a_YEH:
721 	    tempc = a_f_YEH;
722 	    break;
723 	default:
724 	    tempc = 0;
725     }
726 
727     return tempc;
728 }
729 
730 
731 /*
732  * Change shape - from Initial to Medial
733  */
734     static int
735 chg_c_i2m(cur_c)
736     int cur_c;
737 {
738     int tempc;
739 
740     switch (cur_c)
741     {
742 	case a_i_YEH_HAMZA:
743 	    tempc = a_m_YEH_HAMZA;
744 	    break;
745 	case a_i_BEH:
746 	    tempc = a_m_BEH;
747 	    break;
748 	case a_i_TEH:
749 	    tempc = a_m_TEH;
750 	    break;
751 	case a_i_THEH:
752 	    tempc = a_m_THEH;
753 	    break;
754 	case a_i_JEEM:
755 	    tempc = a_m_JEEM;
756 	    break;
757 	case a_i_HAH:
758 	    tempc = a_m_HAH;
759 	    break;
760 	case a_i_KHAH:
761 	    tempc = a_m_KHAH;
762 	    break;
763 	case a_i_SEEN:
764 	    tempc = a_m_SEEN;
765 	    break;
766 	case a_i_SHEEN:
767 	    tempc = a_m_SHEEN;
768 	    break;
769 	case a_i_SAD:
770 	    tempc = a_m_SAD;
771 	    break;
772 	case a_i_DAD:
773 	    tempc = a_m_DAD;
774 	    break;
775 	case a_i_TAH:
776 	    tempc = a_m_TAH;
777 	    break;
778 	case a_i_ZAH:
779 	    tempc = a_m_ZAH;
780 	    break;
781 	case a_i_AIN:
782 	    tempc = a_m_AIN;
783 	    break;
784 	case a_i_GHAIN:
785 	    tempc = a_m_GHAIN;
786 	    break;
787 	case a_i_FEH:
788 	    tempc = a_m_FEH;
789 	    break;
790 	case a_i_QAF:
791 	    tempc = a_m_QAF;
792 	    break;
793 	case a_i_KAF:
794 	    tempc = a_m_KAF;
795 	    break;
796 	case a_i_LAM:
797 	    tempc = a_m_LAM;
798 	    break;
799 	case a_i_MEEM:
800 	    tempc = a_m_MEEM;
801 	    break;
802 	case a_i_NOON:
803 	    tempc = a_m_NOON;
804 	    break;
805 	case a_i_HEH:
806 	    tempc = a_m_HEH;
807 	    break;
808 	case a_i_YEH:
809 	    tempc = a_m_YEH;
810 	    break;
811 	default:
812 	    tempc = 0;
813     }
814 
815     return tempc;
816 }
817 
818 
819 /*
820  * Change shape - from Final to Medial
821  */
822     static int
823 chg_c_f2m(cur_c)
824     int cur_c;
825 {
826     int tempc;
827 
828     switch (cur_c)
829     {
830 	/* NOTE: these encodings are multi-positional, no ?
831 	   case a_f_ALEF_MADDA:
832 	   case a_f_ALEF_HAMZA_ABOVE:
833 	   case a_f_ALEF_HAMZA_BELOW:
834 	   */
835 	case a_f_YEH_HAMZA:
836 	    tempc = a_m_YEH_HAMZA;
837 	    break;
838 	case a_f_WAW_HAMZA:		/* exceptions */
839 	case a_f_ALEF:
840 	case a_f_TEH_MARBUTA:
841 	case a_f_DAL:
842 	case a_f_THAL:
843 	case a_f_REH:
844 	case a_f_ZAIN:
845 	case a_f_WAW:
846 	case a_f_ALEF_MAKSURA:
847 	    tempc = cur_c;
848 	    break;
849 	case a_f_BEH:
850 	    tempc = a_m_BEH;
851 	    break;
852 	case a_f_TEH:
853 	    tempc = a_m_TEH;
854 	    break;
855 	case a_f_THEH:
856 	    tempc = a_m_THEH;
857 	    break;
858 	case a_f_JEEM:
859 	    tempc = a_m_JEEM;
860 	    break;
861 	case a_f_HAH:
862 	    tempc = a_m_HAH;
863 	    break;
864 	case a_f_KHAH:
865 	    tempc = a_m_KHAH;
866 	    break;
867 	case a_f_SEEN:
868 	    tempc = a_m_SEEN;
869 	    break;
870 	case a_f_SHEEN:
871 	    tempc = a_m_SHEEN;
872 	    break;
873 	case a_f_SAD:
874 	    tempc = a_m_SAD;
875 	    break;
876 	case a_f_DAD:
877 	    tempc = a_m_DAD;
878 	    break;
879 	case a_f_TAH:
880 	    tempc = a_m_TAH;
881 	    break;
882 	case a_f_ZAH:
883 	    tempc = a_m_ZAH;
884 	    break;
885 	case a_f_AIN:
886 	    tempc = a_m_AIN;
887 	    break;
888 	case a_f_GHAIN:
889 	    tempc = a_m_GHAIN;
890 	    break;
891 	case a_f_FEH:
892 	    tempc = a_m_FEH;
893 	    break;
894 	case a_f_QAF:
895 	    tempc = a_m_QAF;
896 	    break;
897 	case a_f_KAF:
898 	    tempc = a_m_KAF;
899 	    break;
900 	case a_f_LAM:
901 	    tempc = a_m_LAM;
902 	    break;
903 	case a_f_MEEM:
904 	    tempc = a_m_MEEM;
905 	    break;
906 	case a_f_NOON:
907 	    tempc = a_m_NOON;
908 	    break;
909 	case a_f_HEH:
910 	    tempc = a_m_HEH;
911 	    break;
912 	case a_f_YEH:
913 	    tempc = a_m_YEH;
914 	    break;
915 	    /* NOTE: these encodings are multi-positional, no ?
916 		case a_f_LAM_ALEF_MADDA_ABOVE:
917 		case a_f_LAM_ALEF_HAMZA_ABOVE:
918 		case a_f_LAM_ALEF_HAMZA_BELOW:
919 		case a_f_LAM_ALEF:
920 		*/
921 	default:
922 	    tempc = 0;
923     }
924 
925     return tempc;
926 }
927 
928 
929 /*
930  * Change shape - from Combination (2 char) to an Isolated
931  */
932     static int
933 chg_c_laa2i(hid_c)
934     int hid_c;
935 {
936     int tempc;
937 
938     switch (hid_c)
939     {
940 	case a_ALEF_MADDA:
941 	    tempc = a_s_LAM_ALEF_MADDA_ABOVE;
942 	    break;
943 	case a_ALEF_HAMZA_ABOVE:
944 	    tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
945 	    break;
946 	case a_ALEF_HAMZA_BELOW:
947 	    tempc = a_s_LAM_ALEF_HAMZA_BELOW;
948 	    break;
949 	case a_ALEF:
950 	    tempc = a_s_LAM_ALEF;
951 	    break;
952 	default:
953 	    tempc = 0;
954     }
955 
956     return tempc;
957 }
958 
959 
960 /*
961  * Change shape - from Combination-Isolated to Final
962  */
963     static int
964 chg_c_laa2f(hid_c)
965     int hid_c;
966 {
967     int tempc;
968 
969     switch (hid_c)
970     {
971 	case a_ALEF_MADDA:
972 	    tempc = a_f_LAM_ALEF_MADDA_ABOVE;
973 	    break;
974 	case a_ALEF_HAMZA_ABOVE:
975 	    tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
976 	    break;
977 	case a_ALEF_HAMZA_BELOW:
978 	    tempc = a_f_LAM_ALEF_HAMZA_BELOW;
979 	    break;
980 	case a_ALEF:
981 	    tempc = a_f_LAM_ALEF;
982 	    break;
983 	default:
984 	    tempc = 0;
985     }
986 
987     return tempc;
988 }
989 
990 /*
991  * Do "half-shaping" on character "c".  Return zero if no shaping.
992  */
993     static int
994 half_shape(c)
995     int		c;
996 {
997     if (A_is_a(c))
998 	return chg_c_a2i(c);
999     if (A_is_valid(c) && A_is_f(c))
1000 	return chg_c_f2m(c);
1001     return 0;
1002 }
1003 
1004 /*
1005  * Do Arabic shaping on character "c".  Returns the shaped character.
1006  * out:    "ccp" points to the first byte of the character to be shaped.
1007  * in/out: "c1p" points to the first composing char for "c".
1008  * in:     "prev_c"  is the previous character (not shaped)
1009  * in:     "prev_c1" is the first composing char for the previous char
1010  *		     (not shaped)
1011  * in:     "next_c"  is the next character (not shaped).
1012  */
1013     int
1014 arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
1015     int		c;
1016     int		*ccp;
1017     int		*c1p;
1018     int		prev_c;
1019     int		prev_c1;
1020     int		next_c;
1021 {
1022     int		curr_c;
1023     int		shape_c;
1024     int		curr_laa;
1025     int		prev_laa;
1026 
1027     /* Deal only with Arabic character, pass back all others */
1028     if (!A_is_ok(c))
1029 	return c;
1030 
1031     /* half-shape current and previous character */
1032     shape_c = half_shape(prev_c);
1033 
1034     /* Save away current character */
1035     curr_c = c;
1036 
1037     curr_laa = A_firstc_laa(c, *c1p);
1038     prev_laa = A_firstc_laa(prev_c, prev_c1);
1039 
1040     if (curr_laa)
1041     {
1042 	if (A_is_valid(prev_c) && !A_is_f(shape_c)
1043 					 && !A_is_s(shape_c) && !prev_laa)
1044 	    curr_c = chg_c_laa2f(curr_laa);
1045 	else
1046 	    curr_c = chg_c_laa2i(curr_laa);
1047 
1048 	/* Remove the composing character */
1049 	*c1p = 0;
1050     }
1051     else if (!A_is_valid(prev_c) && A_is_valid(next_c))
1052 	curr_c = chg_c_a2i(c);
1053     else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
1054 	curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
1055     else if (A_is_valid(next_c))
1056 	curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
1057     else if (A_is_valid(prev_c))
1058 	curr_c = chg_c_a2f(c);
1059     else
1060 	curr_c = chg_c_a2s(c);
1061 
1062     /* Sanity check -- curr_c should, in the future, never be 0.
1063      * We should, in the future, insert a fatal error here. */
1064     if (curr_c == NUL)
1065 	curr_c = c;
1066 
1067     if (curr_c != c && ccp != NULL)
1068     {
1069 	char_u buf[MB_MAXBYTES + 1];
1070 
1071 	/* Update the first byte of the character. */
1072 	(*mb_char2bytes)(curr_c, buf);
1073 	*ccp = buf[0];
1074     }
1075 
1076     /* Return the shaped character */
1077     return curr_c;
1078 }
1079 
1080 
1081 /*
1082  * A_firstc_laa returns first character of LAA combination if it exists
1083  */
1084     static int
1085 A_firstc_laa(c, c1)
1086     int c;	/* base character */
1087     int c1;	/* first composing character */
1088 {
1089     if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
1090 	return c1;
1091     return 0;
1092 }
1093 
1094 
1095 /*
1096  * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1097  *		(harakat/tanween)
1098  */
1099     static int
1100 A_is_harakat(c)
1101     int c;
1102 {
1103     return (c >= a_FATHATAN && c <= a_SUKUN);
1104 }
1105 
1106 
1107 /*
1108  * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1109  *		(alphabet/number/punctuation)
1110  */
1111     static int
1112 A_is_iso(c)
1113     int c;
1114 {
1115     return ((c >= a_HAMZA && c <= a_GHAIN)
1116 	    || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
1117 	    || c == a_MINI_ALEF);
1118 }
1119 
1120 
1121 /*
1122  * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1123  *		(alphabet/number/punctuation)
1124  */
1125     static int
1126 A_is_formb(c)
1127     int c;
1128 {
1129     return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
1130 	    || c == a_s_KASRATAN
1131 	    || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
1132 	    || c == a_BYTE_ORDER_MARK);
1133 }
1134 
1135 
1136 /*
1137  * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1138  */
1139     static int
1140 A_is_ok(c)
1141     int c;
1142 {
1143     return (A_is_iso(c) || A_is_formb(c));
1144 }
1145 
1146 
1147 /*
1148  * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1149  *		with some exceptions/exclusions
1150  */
1151     static int
1152 A_is_valid(c)
1153     int c;
1154 {
1155     return (A_is_ok(c) && !A_is_special(c));
1156 }
1157 
1158 
1159 /*
1160  * A_is_special returns TRUE if 'c' is not a special Arabic character.
1161  *		Specials don't adhere to most of the rules.
1162  */
1163     static int
1164 A_is_special(c)
1165     int c;
1166 {
1167     return (c == a_HAMZA || c == a_s_HAMZA);
1168 }
1169