convert to unix line endings.
[Rockbox.git] / firmware / bidi.c
blobeb3af22e9cf9e74fe59464853f2f0fb77fd324d5
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2005 by Gadi Cohen
12 * Largely based on php_hebrev by Zeev Suraski <zeev@php.net>
13 * Heavily modified by Gadi Cohen aka Kinslayer <dragon@wastelands.net>
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ****************************************************************************/
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include "file.h"
26 #include "lcd.h"
27 #include "rbunicode.h"
28 #include "arabjoin.h"
29 #include "scroll_engine.h"
30 #include "bidi.h"
32 /* #define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2 */
33 #define _HEB_BLOCK_TYPE_ENG 1
34 #define _HEB_BLOCK_TYPE_HEB 0
35 #define _HEB_ORIENTATION_LTR 1
36 #define _HEB_ORIENTATION_RTL 0
38 #define ischar(c) ((c > 0x0589 && c < 0x0700) || \
39 (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0)
40 #define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0)
41 #define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0)
42 #define XOR(a,b) ((a||b) && !(a&&b))
44 static const arab_t * arab_lookup(unsigned short uchar)
46 if (uchar >= 0x621 && uchar <= 0x63a)
47 return &(jointable[uchar - 0x621]);
48 if (uchar >= 0x640 && uchar <= 0x64a)
49 return &(jointable[uchar - 0x621 - 5]);
50 if (uchar >= 0x671 && uchar <= 0x6d5)
51 return &(jointable[uchar - 0x621 - 5 - 38]);
52 if (uchar == 0x200D) /* Support for the zero-width joiner */
53 return &zwj;
54 return 0;
57 static void arabjoin(unsigned short * stringprt, int length)
59 bool connected = false;
60 unsigned short * writeprt = stringprt;
62 const arab_t * prev = 0;
63 const arab_t * cur;
64 const arab_t * ligature = 0;
65 short uchar;
67 int i;
68 for (i = 0; i <= length; i++) {
69 cur = arab_lookup(uchar = *stringprt++);
71 /* Skip non-arabic chars */
72 if (cur == 0) {
73 if (prev) {
74 /* Finish the last char */
75 if (connected) {
76 *writeprt++ = prev->final;
77 connected = false;
78 } else
79 *writeprt++ = prev->isolated;
80 prev = 0;
81 *writeprt++ = uchar;
82 } else {
83 *writeprt++ = uchar;
85 continue;
88 /* nothing to do for arabic char if the previous was non-arabic */
89 if (prev == 0) {
90 prev = cur;
91 continue;
94 /* if it's LAM, check for LAM+ALEPH ligatures */
95 if (prev->isolated == 0xfedd) {
96 switch (cur->isolated) {
97 case 0xfe8d:
98 ligature = &(lamaleph[0]);
99 break;
100 case 0xfe87:
101 ligature = &(lamaleph[1]);
102 break;
103 case 0xfe83:
104 ligature = &(lamaleph[2]);
105 break;
106 case 0xfe81:
107 ligature = &(lamaleph[3]);
111 if (ligature) { /* replace the 2 glyphs by their ligature */
112 prev = ligature;
113 ligature = 0;
114 } else {
115 if (connected) { /* previous char has something connected to it */
116 if (prev->medial && cur->final) /* Can we connect to it? */
117 *writeprt++ = prev->medial;
118 else {
119 *writeprt++ = prev->final;
120 connected = false;
122 } else {
123 if (prev->initial && cur->final) { /* Can we connect to it? */
124 *writeprt++ = prev->initial;
125 connected = true;
126 } else
127 *writeprt++ = prev->isolated;
129 prev = cur;
134 unsigned short *bidi_l2v(const unsigned char *str, int orientation)
136 int length = utf8length(str);
137 static unsigned short utf16_buf[SCROLL_LINE_SIZE];
138 static unsigned short bidi_buf[SCROLL_LINE_SIZE];
139 unsigned short *heb_str, *target, *tmp; /* *broken_str */
140 int block_start, block_end, block_type, block_length, i;
142 long max_chars=0;
143 int begin, end, char_count, orig_begin;
145 tmp = str;
147 target = tmp = utf16_buf;
148 while (*str)
149 str = utf8decode(str, target++);
150 *target = 0;
152 if (target == utf16_buf) /* empty string */
153 return target;
155 /* properly join any arabic chars */
156 arabjoin(utf16_buf, length);
158 block_start=block_end=block_length=0;
160 heb_str = bidi_buf;
161 if (orientation) {
162 target = heb_str;
163 } else {
164 target = heb_str + length;
165 *target = 0;
166 target--;
169 if (ischar(*tmp))
170 block_type = _HEB_BLOCK_TYPE_HEB;
171 else
172 block_type = _HEB_BLOCK_TYPE_ENG;
174 do {
175 while((XOR(ischar(*(tmp+1)),block_type)
176 || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1))
177 || *(tmp+1)=='\n')
178 && block_end < length-1) {
179 tmp++;
180 block_end++;
181 block_length++;
184 if (block_type != orientation) {
185 while ((_isblank(*tmp) || ispunct((int)*tmp))
186 && *tmp!='/' && *tmp!='-' && block_end>block_start) {
187 tmp--;
188 block_end--;
192 for (i=block_start; i<=block_end; i++) {
193 *target = (block_type == orientation) ?
194 *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
195 if (block_type!=orientation) {
196 switch (*target) {
197 case '(':
198 *target = ')';
199 break;
200 case ')':
201 *target = '(';
202 break;
203 default:
204 break;
207 target += orientation ? 1 : -1;
209 block_type = !block_type;
210 block_start=block_end+1;
211 } while(block_end<length-1);
213 *target = 0;
215 #if 0 /* Is this code really necessary? */
216 broken_str = utf16_buf;
217 begin=end=length-1;
218 target = broken_str;
220 while (1) {
221 char_count=0;
222 while ((!max_chars || char_count<max_chars) && begin>0) {
223 char_count++;
224 begin--;
225 if (begin<=0 || _isnewline(heb_str[begin])) {
226 while(begin>0 && _isnewline(heb_str[begin-1])) {
227 begin--;
228 char_count++;
230 break;
233 if (char_count==max_chars) { /* try to avoid breaking words */
234 int new_char_count = char_count;
235 int new_begin = begin;
237 while (new_char_count>0) {
238 if (_isblank(heb_str[new_begin]) ||
239 _isnewline(heb_str[new_begin])) {
240 break;
242 new_begin++;
243 new_char_count--;
245 if (new_char_count>0) {
246 char_count=new_char_count;
247 begin=new_begin;
250 orig_begin=begin;
252 /* if (_isblank(heb_str[begin])) {
253 heb_str[begin]='\n';
254 } */
256 /* skip leading newlines */
257 while (begin<=end && _isnewline(heb_str[begin])) {
258 begin++;
261 /* copy content */
262 for (i=begin; i<=end; i++) {
263 *target = heb_str[i];
264 target++;
267 for (i=orig_begin; i<=end && _isnewline(heb_str[i]); i++) {
268 *target = heb_str[i];
269 target++;
271 begin=orig_begin;
273 if (begin<=0) {
274 *target = 0;
275 break;
277 begin--;
278 end=begin;
280 return broken_str;
281 #endif
282 return heb_str;