Merged in first part of ffmpeg accuracy improvments. No change on my samples, but...
[kugel-rb.git] / firmware / bidi.c
blobb5851f298e832cdb46e81c27035295fe3a87381d
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2005 by Gadi Cohen
12 * Largely based on php_hebrev by Zeev Suraski <zeev@php.net>
13 * Heavily modified by Gadi Cohen aka Kinslayer <dragon@wastelands.net>
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ****************************************************************************/
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include "file.h"
26 #include "lcd.h"
27 #include "rbunicode.h"
28 #include "arabjoin.h"
30 //#define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2
31 #define _HEB_BLOCK_TYPE_ENG 1
32 #define _HEB_BLOCK_TYPE_HEB 0
33 #define _HEB_ORIENTATION_LTR 1
34 #define _HEB_ORIENTATION_RTL 0
36 #define ischar(c) ((c > 0x0589 && c < 0x0700) || (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0)
37 #define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0)
38 #define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0)
39 #define XOR(a,b) ((a||b) && !(a&&b))
41 const arab_t * arab_lookup(unsigned short uchar)
43 if (uchar >= 0x621 && uchar <= 0x63a)
44 return &(jointable[uchar - 0x621]);
45 if (uchar >= 0x640 && uchar <= 0x64a)
46 return &(jointable[uchar - 0x621 - 5]);
47 if (uchar >= 0x671 && uchar <= 0x6d5)
48 return &(jointable[uchar - 0x621 - 5 - 38]);
49 if (uchar == 0x200D) /* Support for the zero-width joiner */
50 return &zwj;
51 return 0;
54 void arabjoin(unsigned short * stringprt, int length){
56 bool connected = false;
57 unsigned short * writeprt = stringprt;
59 const arab_t * prev = 0;
60 const arab_t * cur;
61 const arab_t * ligature = 0;
62 short uchar;
64 int i;
65 for (i = 0; i <= length; i++) {
66 cur = arab_lookup(uchar = *stringprt++);
68 /* Skip non-arabic chars */
69 if (cur == 0) {
70 if (prev) {
71 /* Finish the last char */
72 if (connected) {
73 *writeprt++ = prev->final;
74 connected = false;
75 } else
76 *writeprt++ = prev->isolated;
77 prev = 0;
78 *writeprt++ = uchar;
79 } else {
80 *writeprt++ = uchar;
82 continue;
85 /* nothing to do for arabic char if the previous was non-arabic */
86 if (prev == 0) {
87 prev = cur;
88 continue;
91 /* if it's LAM, check for LAM+ALEPH ligatures */
92 if (prev->isolated == 0xfedd) {
93 switch (cur->isolated) {
94 case 0xfe8d:
95 ligature = &(lamaleph[0]);
96 break;
97 case 0xfe87:
98 ligature = &(lamaleph[1]);
99 break;
100 case 0xfe83:
101 ligature = &(lamaleph[2]);
102 break;
103 case 0xfe81:
104 ligature = &(lamaleph[3]);
108 if (ligature) { /* replace the 2 glyphs by their ligature */
109 prev = ligature;
110 ligature = 0;
111 } else {
112 if (connected) { /* previous char has something connected to it */
113 if (prev->medial && cur->final) /* Can we connect to it? */
114 *writeprt++ = prev->medial;
115 else {
116 *writeprt++ = prev->final;
117 connected = false;
119 } else {
120 if (prev->initial && cur->final) { /* Can we connect to it? */
121 *writeprt++ = prev->initial;
122 connected = true;
123 } else
124 *writeprt++ = prev->isolated;
126 prev = cur;
131 unsigned short *bidi_l2v(const unsigned char *str, int orientation)
133 int length = utf8length(str);
134 static unsigned short utf16_buf[SCROLL_LINE_SIZE];
135 static unsigned short bidi_buf[SCROLL_LINE_SIZE];
136 unsigned short *heb_str, *target, *tmp; // *broken_str
137 int block_start, block_end, block_type, block_length, i;
138 //long max_chars=0;
139 //int begin, end, char_count, orig_begin;
141 //tmp = str;
142 target = tmp = utf16_buf;
143 while (*str)
144 str = utf8decode(str, target++);
145 *target = 0;
147 if (target == utf16_buf) /* empty string */
148 return target;
150 /* properly join any arabic chars */
151 arabjoin(utf16_buf, length);
153 block_start=block_end=block_length=0;
155 heb_str = bidi_buf;
156 if (orientation) {
157 target = heb_str;
158 } else {
159 target = heb_str + length;
160 *target = 0;
161 target--;
164 if (ischar(*tmp))
165 block_type = _HEB_BLOCK_TYPE_HEB;
166 else
167 block_type = _HEB_BLOCK_TYPE_ENG;
169 do {
170 while((XOR(ischar(*(tmp+1)),block_type)
171 || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1))
172 || *(tmp+1)=='\n')
173 && block_end < length-1) {
174 tmp++;
175 block_end++;
176 block_length++;
179 if (block_type != orientation) {
180 while ((_isblank(*tmp) || ispunct((int)*tmp))
181 && *tmp!='/' && *tmp!='-' && block_end>block_start) {
182 tmp--;
183 block_end--;
187 for (i=block_start; i<=block_end; i++) {
188 *target = (block_type == orientation) ? *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
189 if (block_type!=orientation) {
190 switch (*target) {
191 case '(':
192 *target = ')';
193 break;
194 case ')':
195 *target = '(';
196 break;
197 default:
198 break;
201 target += orientation ? 1 : -1;
203 block_type = !block_type;
204 block_start=block_end+1;
205 } while(block_end<length-1);
207 *target = 0;
209 #if 0 /* Is this code really necessary? */
210 broken_str = utf16_buf;
211 begin=end=length-1;
212 target = broken_str;
214 while (1) {
215 char_count=0;
216 while ((!max_chars || char_count<max_chars) && begin>0) {
217 char_count++;
218 begin--;
219 if (begin<=0 || _isnewline(heb_str[begin])) {
220 while(begin>0 && _isnewline(heb_str[begin-1])) {
221 begin--;
222 char_count++;
224 break;
227 if (char_count==max_chars) { /* try to avoid breaking words */
228 int new_char_count = char_count;
229 int new_begin = begin;
231 while (new_char_count>0) {
232 if (_isblank(heb_str[new_begin]) ||
233 _isnewline(heb_str[new_begin])) {
234 break;
236 new_begin++;
237 new_char_count--;
239 if (new_char_count>0) {
240 char_count=new_char_count;
241 begin=new_begin;
244 orig_begin=begin;
246 /* if (_isblank(heb_str[begin])) {
247 heb_str[begin]='\n';
248 } */
250 /* skip leading newlines */
251 while (begin<=end && _isnewline(heb_str[begin])) {
252 begin++;
255 /* copy content */
256 for (i=begin; i<=end; i++) {
257 *target = heb_str[i];
258 target++;
261 for (i=orig_begin; i<=end && _isnewline(heb_str[i]); i++) {
262 *target = heb_str[i];
263 target++;
265 begin=orig_begin;
267 if (begin<=0) {
268 *target = 0;
269 break;
271 begin--;
272 end=begin;
274 return broken_str;
275 #endif
276 return heb_str;