Reimplemented the way glyphs are rendered in YUVA mode.
[vlc/solaris.git] / modules / demux / dvb-text.h
blobbe7b207fb1a67e7e06b9cf106fdf3df00d6aac86
1 /*****************************************************************************
2 * dvb-text.h:
3 *****************************************************************************
4 * Copyright (C) 2007-2011 the VideoLAN team
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19 *****************************************************************************/
21 /**
22 * Converts a DVB SI text item to UTF-8.
23 * Refer to EN 800 486 annex A.
24 * @return a heap-allocation nul-terminated UTF-8 string or NULL on error.
26 static char *vlc_from_EIT (const void *buf, size_t length)
28 if (unlikely(length == 0))
29 return NULL;
31 char encbuf[12];
32 const char *encoding = encbuf;
34 const char *in = buf;
35 size_t offset = 1;
36 unsigned char c = *in;
38 if (c >= 0x20)
40 offset = 0;
41 encoding = "ISO_6937";
43 else if ((1 << c) & 0x0EFE) /* 1-7, 9-11 -> ISO 8859-(c+4) */
45 snprintf (encbuf, sizeof (encbuf), "ISO_8859-%hhu", 4 + c);
47 else switch (c)
49 case 0x10: /* two more bytes */
50 offset = 3;
51 if (length < 3 || in[1] != 0x00)
52 return NULL;
54 c = in[2];
55 if ((1 << c) & 0xEFFE) /* 1-11, 13-15 -> ISO 8859-(c) */
56 snprintf (encbuf, sizeof (encbuf), "ISO_8859-%hhu", c);
57 else
58 return NULL;
59 break;
60 case 0x11: /* the BMP */
61 encoding = "UCS-2BE";
62 break;
63 case 0x12:
64 /* DVB has no clue about Korean. KS X 1001 (a.k.a. KS C 5601) is a
65 * character set, not a character encoding... So we assume EUC-KR.
66 * It is an encoding of KS X 1001. In practice, I guess nobody uses
67 * this in any real DVB system. */
68 encoding = "EUC-KR";
69 break;
70 case 0x13: /* GB-2312-1980 */
71 encoding = "GB2312";
72 break;
73 case 0x14: /* Big5 subset of the BMP */
74 encoding = "BIG-5";
75 break;
76 case 0x15:
77 encoding = "UTF-8";
78 break;
79 #if 0
80 case 0x1F: /* operator-specific(?) */
81 offset = 2;
82 #endif
83 default:
84 return NULL;
87 in += offset;
88 length -= offset;
90 char *out = FromCharset (encoding, in, length);
91 if (out == NULL)
92 { /* Fallback... */
93 out = strndup (in, length);
94 if (unlikely(out == NULL))
95 return NULL;
96 EnsureUTF8 (out);
99 /* Convert control codes */
100 for (char *p = strchr (out, '\xC2'); p; p = strchr (p + 1, '\xC2'))
102 /* We have valid UTF-8, to 0xC2 is followed by a continuation byte. */
103 /* 0x80-0x85,0x88-0x89 are reserved.
104 * 0x86-0x87 are identical to Unicode and Latin-1.
105 * 0x8A is CR/LF.
106 * 0x8B-0x9F are unspecified. */
107 if (p[1] == '\x8A')
108 memcpy (p, "\r\n", 2);
111 /* Private use area */
112 for (char *p = strchr (out, '\xEE'); p; p = strchr (p + 1, '\xEE'))
114 /* Within UTF-8, 0xEE is followed by a two continuation bytes. */
115 if (p[1] != '\x82')
116 continue;
117 if (p[2] == '\x8A')
118 memcpy (p, "\r\r\n", 3); /* we need three bytes, so to CRs ;) */
121 return out;