FS#12076 - DB stats resurrection: If the filename was changed, require
[kugel-rb.git] / apps / plugins / text_viewer / tv_text_processor.c
blobedb2ad0483716b9caf9afb0cc0592ed614ee3e60
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2002 Gilles Roux
11 * 2003 Garrett Derner
12 * 2010 Yoshihisa Uchida
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
22 ****************************************************************************/
23 #include "plugin.h"
24 #include "ctype.h"
25 #include "tv_preferences.h"
26 #include "tv_text_processor.h"
28 enum{
29 TV_TEXT_UNKNOWN,
30 TV_TEXT_MAC,
31 TV_TEXT_UNIX,
32 TV_TEXT_WIN,
35 /* the max characters of each blocks */
36 #ifdef HAVE_LCD_BITMAP
37 #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH / 2 + 1)
38 #else
39 #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH + 1)
40 #endif
42 #define TV_MAX_BLOCKS 5
44 static unsigned text_type = TV_TEXT_UNKNOWN;
46 static const unsigned char *end_ptr;
48 static unsigned short ucsbuf[TV_MAX_BLOCKS][TV_MAX_CHARS_PER_BLOCK];
49 static unsigned char utf8buf[TV_MAX_CHARS_PER_BLOCK * (2 * 3)];
50 static unsigned char *outbuf;
52 static int block_count;
53 static int block_width;
55 /* if this value is true, then tv_create_line_text returns a blank line. */
56 static bool expand_extra_line = false;
58 /* when a line is divided, this value sets true. */
59 static bool is_break_line = false;
61 static unsigned short break_chars[] =
64 /* halfwidth characters */
65 '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7,
66 /* fullwidth characters */
67 0x2010, /* hyphen */
68 0x3000, /* fullwidth space */
69 0x3001, /* ideographic comma */
70 0x3002, /* ideographic full stop */
71 0x30fb, /* katakana middle dot */
72 0x30fc, /* katakana-hiragana prolonged sound mark */
73 0xff01, /* fullwidth exclamation mark */
74 0xff0c, /* fullwidth comma */
75 0xff0d, /* fullwidth hyphen-minus */
76 0xff0e, /* fullwidth full stop */
77 0xff1a, /* fullwidth colon */
78 0xff1b, /* fullwidth semicolon */
79 0xff1f, /* fullwidth question mark */
82 /* the characters which is not judged as space with isspace() */
83 static unsigned short extra_spaces[] = { 0, 0x3000 };
85 static int tv_glyph_width(int ch)
87 if (ch == '\n')
88 return 0;
90 if (ch == 0)
91 ch = ' ';
93 #ifdef HAVE_LCD_BITMAP
94 /* the width of the diacritics charcter is 0 */
95 if (rb->is_diacritic(ch, NULL))
96 return 0;
98 return rb->font_get_width(preferences->font, ch);
99 #else
100 return 1;
101 #endif
104 static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch)
106 int count = 1;
107 unsigned char utf8_tmp[3];
109 /* distinguish the text_type */
110 if (*str == '\r')
112 if (text_type == TV_TEXT_WIN || text_type == TV_TEXT_UNKNOWN)
114 if (str + 1 < end_ptr && *(str+1) == '\n')
116 if (text_type == TV_TEXT_UNKNOWN)
117 text_type = TV_TEXT_WIN;
119 *ch = '\n';
120 return (unsigned char *)str + 2;
123 if (text_type == TV_TEXT_UNKNOWN)
124 text_type = TV_TEXT_MAC;
126 *ch = (text_type == TV_TEXT_MAC)? '\n' : ' ';
127 return (unsigned char *)str + 1;
129 else if (*str == '\n')
131 if (text_type == TV_TEXT_UNKNOWN)
132 text_type = TV_TEXT_UNIX;
134 *ch = (text_type == TV_TEXT_UNIX)? '\n' : ' ';
135 return (unsigned char *)str + 1;
138 if (preferences->encoding == UTF_8)
139 return (unsigned char*)rb->utf8decode(str, ch);
141 #ifdef HAVE_LCD_BITMAP
142 if ((*str >= 0x80) &&
143 ((preferences->encoding > SJIS) ||
144 (preferences->encoding == SJIS && (*str <= 0xa0 || *str >= 0xe0))))
146 if (str + 1 >= end_ptr)
148 end_ptr = str;
149 *ch = 0;
150 return (unsigned char *)str;
152 count = 2;
154 #endif
155 rb->iso_decode(str, utf8_tmp, preferences->encoding, count);
156 rb->utf8decode(utf8_tmp, ch);
157 return (unsigned char *)str + count;
160 static void tv_decode2utf8(const unsigned short *ucs, int count)
162 int i;
164 for (i = 0; i < count; i++)
165 outbuf = rb->utf8encode(ucs[i], outbuf);
167 *outbuf = '\0';
170 static bool tv_is_line_break_char(unsigned short ch)
172 size_t i;
174 /* when the word mode is CHOP, all characters does not break line. */
175 if (preferences->word_mode == WM_CHOP)
176 return false;
178 for (i = 0; i < sizeof(break_chars); i++)
180 if (break_chars[i] == ch)
181 return true;
183 return false;
186 static bool tv_isspace(unsigned short ch)
188 size_t i;
190 if (ch < 128 && isspace(ch))
191 return true;
193 for (i = 0; i < sizeof(extra_spaces); i++)
195 if (extra_spaces[i] == ch)
196 return true;
198 return false;
201 static bool tv_is_break_line_join_mode(const unsigned char *next_str)
203 unsigned short ch;
205 tv_get_ucs(next_str, &ch);
206 return tv_isspace(ch);
209 static int tv_form_reflow_line(unsigned short *ucs, int chars)
211 unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK];
212 unsigned short *p = new_ucs;
213 unsigned short ch;
214 int i;
215 int k;
216 int expand_spaces;
217 int indent_chars = 0;
218 int nonspace_chars = 0;
219 int nonspace_width = 0;
220 int remain_spaces;
221 int spaces = 0;
222 int words_spaces;
224 if (preferences->alignment == AL_LEFT)
226 while (chars > 0 && ucs[chars-1] == ' ')
227 chars--;
230 if (chars == 0)
231 return 0;
233 while (ucs[indent_chars] == ' ')
234 indent_chars++;
236 for (i = indent_chars; i < chars; i++)
238 ch = ucs[i];
239 if (ch == ' ')
240 spaces++;
241 else
243 nonspace_chars++;
244 nonspace_width += tv_glyph_width(ch);
248 if (spaces == 0)
249 return chars;
251 expand_spaces = (block_width - nonspace_width) / tv_glyph_width(' ') - indent_chars;
252 if (indent_chars + nonspace_chars + expand_spaces > TV_MAX_CHARS_PER_BLOCK)
253 expand_spaces = TV_MAX_CHARS_PER_BLOCK - indent_chars - nonspace_chars;
255 words_spaces = expand_spaces / spaces;
256 remain_spaces = expand_spaces - words_spaces * spaces;
258 for (i = 0; i < indent_chars; i++)
259 *p++ = ' ';
261 for ( ; i < chars; i++)
263 ch = ucs[i];
264 *p++ = ch;
265 if (ch == ' ')
267 for (k = ((remain_spaces > 0)? 0 : 1); k < words_spaces; k++)
268 *p++ = ch;
270 remain_spaces--;
274 rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK);
275 return indent_chars + nonspace_chars + expand_spaces;
278 static void tv_align_right(int *block_chars)
280 unsigned short *cur_text;
281 unsigned short *prev_text;
282 unsigned short ch;
283 int cur_block = block_count - 1;
284 int prev_block;
285 int cur_chars;
286 int prev_chars;
287 int idx;
288 int break_pos;
289 int break_width = 0;
290 int append_width;
291 int width;
293 while (cur_block > 0)
295 cur_text = ucsbuf[cur_block];
296 cur_chars = block_chars[cur_block];
297 idx = cur_chars;
298 width = 0;
299 while(--idx >= 0)
300 width += tv_glyph_width(cur_text[idx]);
302 width = block_width - width;
303 prev_block = cur_block - 1;
305 do {
306 prev_text = ucsbuf[prev_block];
307 prev_chars = block_chars[prev_block];
309 idx = prev_chars;
310 append_width = 0;
311 break_pos = prev_chars;
312 while (append_width < width && idx > 0)
314 ch = prev_text[--idx];
315 if (tv_is_line_break_char(ch))
317 break_pos = idx + 1;
318 break_width = append_width;
320 append_width += tv_glyph_width(ch);
322 if (append_width > width)
323 idx++;
325 if (idx == 0)
327 break_pos = 0;
328 break_width = append_width;
331 if (break_pos < prev_chars)
332 append_width = break_width;
333 /* the case of
334 * (1) when the first character of the cur_text concatenates
335 * the last character of the prev_text.
336 * (2) the length of ucsbuf[block] is short (< 0.75 * block width)
338 else if (((!tv_isspace(*cur_text) && !tv_isspace(prev_text[prev_chars - 1])) ||
339 (4 * width >= 3 * block_width)))
341 break_pos = idx;
344 if (break_pos < prev_chars)
346 rb->memmove(cur_text + prev_chars - break_pos,
347 cur_text, block_chars[cur_block] * sizeof(unsigned short));
348 rb->memcpy(cur_text, prev_text + break_pos,
349 (prev_chars - break_pos) * sizeof(unsigned short));
351 block_chars[prev_block] = break_pos;
352 block_chars[cur_block ] += prev_chars - break_pos;
354 } while ((width -= append_width) > 0 && --prev_block >= 0);
355 cur_block--;
359 static int tv_parse_text(const unsigned char *src, unsigned short *ucs,
360 int *ucs_chars, bool is_indent)
362 const unsigned char *cur = src;
363 const unsigned char *next = src;
364 const unsigned char *line_break_ptr = NULL;
365 const unsigned char *line_end_ptr = NULL;
366 unsigned short ch = 0;
367 unsigned short prev_ch;
368 int chars = 0;
369 int gw;
370 int line_break_width = 0;
371 int line_end_chars = 0;
372 int width = 0;
373 bool is_space = false;
375 while (true) {
376 cur = next;
377 if (cur >= end_ptr)
379 line_end_ptr = cur;
380 line_end_chars = chars;
381 is_break_line = true;
382 break;
385 prev_ch = ch;
386 next = tv_get_ucs(cur, &ch);
387 if (ch == '\n')
389 if (preferences->line_mode != LM_JOIN || tv_is_break_line_join_mode(next))
391 line_end_ptr = next;
392 line_end_chars = chars;
393 is_break_line = false;
394 break;
397 if (preferences->word_mode == WM_CHOP || tv_isspace(prev_ch))
398 continue;
401 * when the line mode is JOIN and the word mode is WRAP,
402 * the next character does not concatenate with the
403 * previous character.
405 ch = ' ';
407 else if ((is_space = tv_isspace(ch)) == true)
410 * when the line mode is REFLOW:
411 * (1) spacelike character convert to ' '
412 * (2) plural spaces are collected to one
414 if (preferences->line_mode == LM_REFLOW)
416 ch = ' ';
417 if (prev_ch == ch)
418 continue;
421 /* when the alignment is RIGHT, ignores indent spaces. */
422 if (preferences->alignment == AL_RIGHT && is_indent)
423 continue;
425 else
426 is_indent = false;
428 if (preferences->line_mode == LM_REFLOW && is_indent)
429 gw = tv_glyph_width(ch) * preferences->indent_spaces;
430 else
431 gw = tv_glyph_width(ch);
433 width += gw;
434 if (width > block_width)
436 width -= gw;
437 if (is_space)
439 line_end_ptr = cur;
440 line_end_chars = chars;
442 is_break_line = true;
443 break;
446 if (preferences->line_mode != LM_REFLOW || !is_indent)
447 ucs[chars++] = ch;
448 else
450 unsigned char i;
451 for (i = 0; i < preferences->indent_spaces; i++)
452 ucs[chars++] = ch;
455 if (tv_is_line_break_char(ch))
457 line_break_ptr = next;
458 line_break_width = width;
459 line_end_chars = chars;
461 if (chars >= TV_MAX_CHARS_PER_BLOCK)
463 is_break_line = true;
464 break;
468 /* set the end position and character count */
469 if (line_end_ptr == NULL)
472 * when the last line break position is too short (line length < 0.75 * block width),
473 * the line is cut off at the position where it is closest to the displayed width.
475 if ((preferences->line_mode == LM_REFLOW && line_break_ptr == NULL) ||
476 (4 * line_break_width < 3 * block_width))
478 line_end_ptr = cur;
479 line_end_chars = chars;
481 else
482 line_end_ptr = line_break_ptr;
485 *ucs_chars = line_end_chars;
486 return line_end_ptr - src;
489 int tv_create_formed_text(const unsigned char *src, ssize_t bufsize,
490 int block, bool is_multi, const unsigned char **dst)
492 unsigned short ch;
493 int chars[block_count];
494 int i;
495 int size = 0;
496 bool is_indent;
498 outbuf = utf8buf;
499 *outbuf = '\0';
501 for (i = 0; i < block_count; i++)
502 chars[i] = 0;
504 if (dst != NULL)
505 *dst = utf8buf;
507 if (preferences->line_mode == LM_EXPAND && (expand_extra_line = !expand_extra_line) == true)
508 return 0;
510 end_ptr = src + bufsize;
512 tv_get_ucs(src, &ch);
513 is_indent = (tv_isspace(ch) && !is_break_line);
515 if (is_indent && preferences->line_mode == LM_REFLOW && preferences->indent_spaces == 0
516 && (expand_extra_line = !expand_extra_line) == true)
517 return 0;
519 for (i = 0; i < block_count; i++)
521 size += tv_parse_text(src + size, ucsbuf[i], &chars[i], is_indent);
522 if (!is_break_line)
523 break;
525 is_indent = false;
528 if (dst != NULL)
530 if (preferences->alignment == AL_RIGHT)
531 tv_align_right(chars);
533 for (i = 0; i < block_count; i++)
535 if (i == block || (is_multi && i == block + 1))
537 if (is_break_line && preferences->line_mode == LM_REFLOW)
538 chars[i] = tv_form_reflow_line(ucsbuf[i], chars[i]);
540 tv_decode2utf8(ucsbuf[i], chars[i]);
545 return size;
548 bool tv_init_text_processor(unsigned char **buf, size_t *size)
550 /* unused : no need for dynamic buffer yet */
551 (void)buf;
552 (void)size;
554 text_type = TV_TEXT_UNKNOWN;
555 expand_extra_line = false;
556 is_break_line = false;
557 return true;
560 void tv_set_creation_conditions(int blocks, int width)
562 block_count = blocks;
563 block_width = width;