firmware/bidi.c

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2005 by Gadi Cohen
  11  *
  12  * Largely based on php_hebrev by Zeev Suraski <zeev@php.net>
  13  * Heavily modified by Gadi Cohen aka Kinslayer <dragon@wastelands.net>
  14  *
  15  * All files in this archive are subject to the GNU General Public License.
  16  * See the file COPYING in the source tree root for full license agreement.
  17  *
  18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19  * KIND, either express or implied.
  20  *
  21  ****************************************************************************/
  22 #include <stdio.h>
  23 #include <string.h>
  24 #include <ctype.h>
  25 #include "file.h"
  26 #include "lcd.h"
  27 #include "rbunicode.h"
  28 #include "arabjoin.h"
  29 #include "scroll_engine.h"
  30
  31 /* #define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2 */
  32 #define _HEB_BLOCK_TYPE_ENG 1
  33 #define _HEB_BLOCK_TYPE_HEB 0
  34 #define _HEB_ORIENTATION_LTR 1
  35 #define _HEB_ORIENTATION_RTL 0
  36
  37 #define ischar(c) ((c > 0x0589 && c < 0x0700) || \
  38                    (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0)
  39 #define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0)
  40 #define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0)
  41 #define XOR(a,b) ((a||b) && !(a&&b))
  42
  43 const arab_t * arab_lookup(unsigned short uchar)
  44 {
  45     if (uchar >= 0x621 && uchar <= 0x63a)
  46         return &(jointable[uchar - 0x621]);
  47     if (uchar >= 0x640 && uchar <= 0x64a)
  48         return &(jointable[uchar - 0x621 - 5]);
  49     if (uchar >= 0x671 && uchar <= 0x6d5)
  50         return &(jointable[uchar - 0x621 - 5 - 38]);
  51     if (uchar == 0x200D) /* Support for the zero-width joiner */
  52         return &zwj;
  53     return 0;
  54 }
  55
  56 void arabjoin(unsigned short * stringprt, int length){
  57
  58     bool connected = false;
  59     unsigned short * writeprt = stringprt;
  60
  61     const arab_t * prev = 0;
  62     const arab_t * cur;
  63     const arab_t * ligature = 0;
  64     short uchar;
  65
  66     int i;
  67     for (i = 0; i <= length; i++) {
  68         cur = arab_lookup(uchar = *stringprt++);
  69
  70         /* Skip non-arabic chars */
  71         if (cur == 0) {
  72             if (prev) {
  73                 /* Finish the last char */
  74                 if (connected) {
  75                     *writeprt++ = prev->final;
  76                     connected = false;
  77                 } else
  78                     *writeprt++ = prev->isolated;
  79                 prev = 0;
  80                 *writeprt++ = uchar;
  81             } else {
  82                 *writeprt++ = uchar;
  83             }
  84             continue;
  85         }
  86
  87         /* nothing to do for arabic char if the previous was non-arabic */
  88         if (prev == 0) {
  89             prev = cur;
  90             continue;
  91         }
  92
  93         /* if it's LAM, check for LAM+ALEPH ligatures */
  94         if (prev->isolated == 0xfedd) {
  95             switch (cur->isolated) {
  96                 case 0xfe8d:
  97                     ligature = &(lamaleph[0]);
  98                     break;
  99                 case 0xfe87:
 100                     ligature = &(lamaleph[1]);
 101                     break;
 102                 case 0xfe83:
 103                     ligature = &(lamaleph[2]);
 104                     break;
 105                 case 0xfe81:
 106                     ligature = &(lamaleph[3]);
 107             }
 108         }
 109
 110         if (ligature) { /* replace the 2 glyphs by their ligature */
 111             prev = ligature;
 112             ligature = 0;
 113         } else {
 114             if (connected) { /* previous char has something connected to it */
 115                 if (prev->medial && cur->final) /* Can we connect to it? */
 116                     *writeprt++ = prev->medial;
 117                 else {
 118                     *writeprt++ = prev->final;
 119                     connected = false;
 120                 }
 121             } else {
 122                 if (prev->initial && cur->final) { /* Can we connect to it? */
 123                     *writeprt++ = prev->initial;
 124                     connected = true;
 125                 } else
 126                     *writeprt++ = prev->isolated;
 127             }
 128             prev = cur;
 129         }
 130     }
 131 }
 132
 133 unsigned short *bidi_l2v(const unsigned char *str, int orientation)
 134 {
 135     int length = utf8length(str);
 136     static unsigned short  utf16_buf[SCROLL_LINE_SIZE];
 137     static unsigned short  bidi_buf[SCROLL_LINE_SIZE];
 138     unsigned short *heb_str, *target, *tmp; /* *broken_str */
 139     int block_start, block_end, block_type, block_length, i;
 140     /*
 141     long max_chars=0;
 142     int begin, end, char_count, orig_begin;
 143
 144     tmp = str;
 145     */
 146     target = tmp = utf16_buf;
 147     while (*str)
 148         str = utf8decode(str, target++);
 149     *target = 0;
 150
 151     if (target == utf16_buf) /* empty string */
 152         return target;
 153
 154     /* properly join any arabic chars */
 155     arabjoin(utf16_buf, length);
 156
 157     block_start=block_end=block_length=0;
 158
 159     heb_str = bidi_buf;
 160     if (orientation) {
 161         target = heb_str;
 162     } else {
 163         target = heb_str + length;
 164         *target = 0;
 165         target--;
 166     }
 167
 168     if (ischar(*tmp))
 169         block_type = _HEB_BLOCK_TYPE_HEB;
 170     else
 171         block_type = _HEB_BLOCK_TYPE_ENG;
 172
 173     do {
 174         while((XOR(ischar(*(tmp+1)),block_type)
 175                || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1))
 176                || *(tmp+1)=='\n')
 177               && block_end < length-1) {
 178                 tmp++;
 179                 block_end++;
 180                 block_length++;
 181         }
 182
 183         if (block_type != orientation) {
 184             while ((_isblank(*tmp) || ispunct((int)*tmp))
 185                    && *tmp!='/' && *tmp!='-' && block_end>block_start) {
 186                 tmp--;
 187                 block_end--;
 188             }
 189         }
 190
 191         for (i=block_start; i<=block_end; i++) {
 192             *target = (block_type == orientation) ?
 193                       *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
 194             if (block_type!=orientation) {
 195                 switch (*target) {
 196                 case '(':
 197                     *target = ')';
 198                     break;
 199                 case ')':
 200                     *target = '(';
 201                     break;
 202                 default:
 203                     break;
 204                 }
 205             }
 206             target += orientation ? 1 : -1;
 207         }
 208         block_type = !block_type;
 209         block_start=block_end+1;
 210     } while(block_end<length-1);
 211
 212     *target = 0;
 213
 214 #if 0 /* Is this code really necessary? */
 215     broken_str = utf16_buf;
 216     begin=end=length-1;
 217     target = broken_str;
 218
 219     while (1) {
 220         char_count=0;
 221         while ((!max_chars || char_count<max_chars) && begin>0) {
 222             char_count++;
 223             begin--;
 224             if (begin<=0 || _isnewline(heb_str[begin])) {
 225                 while(begin>0 && _isnewline(heb_str[begin-1])) {
 226                     begin--;
 227                     char_count++;
 228                 }
 229                 break;
 230             }
 231         }
 232         if (char_count==max_chars) { /* try to avoid breaking words */
 233             int new_char_count = char_count;
 234             int new_begin = begin;
 235
 236             while (new_char_count>0) {
 237                 if (_isblank(heb_str[new_begin]) ||
 238                     _isnewline(heb_str[new_begin])) {
 239                     break;
 240                 }
 241                 new_begin++;
 242                 new_char_count--;
 243             }
 244             if (new_char_count>0) {
 245                 char_count=new_char_count;
 246                 begin=new_begin;
 247             }
 248         }
 249         orig_begin=begin;
 250
 251         /* if (_isblank(heb_str[begin])) {
 252             heb_str[begin]='\n';
 253         } */
 254
 255         /* skip leading newlines */
 256         while (begin<=end && _isnewline(heb_str[begin])) {
 257             begin++;
 258         }
 259
 260         /* copy content */
 261         for (i=begin; i<=end; i++) {
 262             *target = heb_str[i];
 263             target++;
 264         }
 265
 266         for (i=orig_begin; i<=end && _isnewline(heb_str[i]); i++) {
 267             *target = heb_str[i];
 268             target++;
 269         }
 270         begin=orig_begin;
 271
 272         if (begin<=0) {
 273             *target = 0;
 274             break;
 275         }
 276         begin--;
 277         end=begin;
 278     }
 279     return broken_str;
 280 #endif
 281     return heb_str;
 282 }
 283