beta-0.89.2
[luatex.git] / source / texk / kpathsea / knj.c
blob571201ce85c80f57a1efa961f9fa1386a354513a
1 /* knj.c: check for 2-Byte Kanji (CP 932, SJIS) codes.
3 Copyright 2010, 2014 Akira Kakuto.
4 Copyright 2013, 2014 TANAKA Takuji.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with this library; if not, see <http://www.gnu.org/licenses/>. */
19 #include <kpathsea/config.h>
20 #include <kpathsea/debug.h>
21 #include <wchar.h>
23 int is_cp932_system, file_system_codepage;
25 int isknj(int c)
27 c &= 0xff;
28 switch (is_cp932_system) {
29 case 932:
30 return((c>=0x81 && c<=0x9f) || (c>=0xe0 && c<=0xfc));
31 case 936:
32 return(c>=0x81 && c<=0xfe);
33 case 950:
34 return((c>=0xa1 && c<=0xc6) || (c>=0xc9 && c<=0xf9));
35 default:
36 return(0);
40 int isknj2(int c)
42 c &= 0xff;
43 switch (is_cp932_system) {
44 case 932:
45 return(c>=0x40 && c<=0xfc && c!=0x7f);
46 case 936:
47 return(c>=0x40 && c<=0xfe && c!=0x7f);
48 case 950:
49 return((c>=0x40 && c<=0x7e) || (c>=0xa1 && c<=0xfe));
50 default:
51 return(0);
56 Get wide string from multibyte string.
58 wchar_t *
59 get_wstring_from_mbstring(int cp, const char *mbstr, wchar_t *wstr)
61 int len;
63 len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, 0);
64 if (len==0) {
65 FATAL("cannot convert string to wide string");
67 if (wstr==NULL) {
68 wstr = xmalloc(sizeof(wchar_t)*(len+1));
70 len = MultiByteToWideChar(cp, 0, mbstr, -1, wstr, len+1);
71 if (len==0) {
72 FATAL("cannot convert multibyte string to wide string");
74 return wstr;
78 Get multibyte string from wide string.
80 char *
81 get_mbstring_from_wstring(int cp, const wchar_t *wstr, char *mbstr)
83 int len;
85 len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, 0, NULL, NULL);
86 if (len==0) {
87 FATAL("cannot convert string to multibyte string");
89 if (mbstr==NULL) {
90 mbstr = xmalloc(len+1);
92 len = WideCharToMultiByte(cp, 0, wstr, -1, mbstr, len+1, NULL, NULL);
93 if (len==0) {
94 FATAL("cannot convert wide string to multibyte string");
96 return mbstr;
100 xfopen by file system codepage
102 FILE *
103 fsyscp_xfopen (const char *filename, const char *mode)
105 FILE *f;
106 wchar_t *fnamew, modew[4];
107 int i;
108 #if defined (KPSE_COMPAT_API)
109 kpathsea kpse;
110 #endif
111 assert(filename && mode);
113 fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
114 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
115 f = _wfopen(fnamew, modew);
116 if (f == NULL)
117 FATAL_PERROR(filename);
118 #if defined (KPSE_COMPAT_API)
119 kpse = kpse_def;
120 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
121 DEBUGF_START ();
122 fprintf (stderr, "fsyscp_xfopen(%s [", filename);
123 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
124 #if defined(_WIN64)
125 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
126 #else
127 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
128 #endif
129 DEBUGF_END ();
131 #endif
132 free(fnamew);
134 return f;
138 fopen by file system codepage
140 FILE *
141 fsyscp_fopen (const char *filename, const char *mode)
143 FILE *f;
144 wchar_t *fnamew, modew[4];
145 int i;
146 #if defined (KPSE_COMPAT_API)
147 kpathsea kpse;
148 #endif
149 assert(filename && mode);
151 fnamew = get_wstring_from_fsyscp(filename, fnamew=NULL);
152 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
153 f = _wfopen(fnamew, modew);
154 #if defined (KPSE_COMPAT_API)
155 if (f != NULL) {
156 kpse = kpse_def;
157 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
158 DEBUGF_START ();
159 fprintf (stderr, "fsyscp_fopen(%s [", filename);
160 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), fnamew, wcslen( fnamew ), NULL, NULL );
161 #if defined(_WIN64)
162 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
163 #else
164 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
165 #endif
166 DEBUGF_END ();
169 #endif
170 free(fnamew);
172 return f;
176 popen by file system codepage
178 static int
179 is_include_space(const char *s)
181 char *p;
182 p = strchr(s, ' ');
183 if(p) return 1;
184 p = strchr(s, '\t');
185 if(p) return 1;
186 return 0;
189 FILE *
190 fsyscp_popen (const char *command, const char *mode)
192 FILE *f;
193 wchar_t *commandw, modew[4];
194 int i;
195 #if defined (KPSE_COMPAT_API)
196 kpathsea kpse;
197 #endif
198 assert(command && mode);
200 if (is_include_space (command)) {
201 const char *p;
202 char *command2, *q;
203 command2 = xmalloc (strlen (command) + 3);
204 p = command;
205 q = command2;
206 *q++ = '\"';
207 while (*p)
208 *q++ = *p++;
209 *q++ = '\"';
210 *q = '\0';
211 commandw = get_wstring_from_fsyscp(command2, commandw=NULL);
212 free (command2);
213 } else {
214 commandw = get_wstring_from_fsyscp(command, commandw=NULL);
216 for(i=0; (modew[i]=(wchar_t)mode[i]); i++) {} /* mode[i] must be ASCII */
217 f = _wpopen(commandw, modew);
218 #if defined (KPSE_COMPAT_API)
219 if (f != NULL) {
220 kpse = kpse_def;
221 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN)) {
222 DEBUGF_START ();
223 fprintf (stderr, "fsyscp_popen(%s [", command);
224 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE ), commandw, wcslen( commandw ), NULL, NULL );
225 #if defined(_WIN64)
226 fprintf (stderr, "], %s) => 0x%I64x\n", mode, (unsigned __int64) f);
227 #else
228 fprintf (stderr, "], %s) => 0x%lx\n", mode, (unsigned long) f);
229 #endif
230 DEBUGF_END ();
233 #endif
234 free (commandw);
235 /* We use always binary mode on Windows */
236 if(f) _setmode (fileno (f), _O_BINARY);
238 return f;
242 get_command_line_args_utf8 (const_string enc, int *p_ac, char ***p_av)
244 int argc;
245 string *argv;
247 if (!enc || !strncmp(enc,"",1)) return 0;
249 #ifdef DEBUG
250 fprintf(stderr, "command_line_encoding (%s)\n", enc);
251 #endif /* DEBUG */
252 if (!(strncmp(enc,"utf8",5) && strncmp(enc,"utf-8",6))) {
253 LPWSTR *argvw;
254 INT argcw, i;
255 string s;
256 #ifdef DEBUG
257 DWORD ret;
258 HANDLE hStderr;
259 hStderr = GetStdHandle( STD_ERROR_HANDLE );
260 #endif /* DEBUG */
261 file_system_codepage = CP_UTF8;
262 is_cp932_system = 0;
263 argvw = CommandLineToArgvW(GetCommandLineW(), &argcw);
264 argc = argcw;
265 argv = xmalloc(sizeof(char *)*(argcw+1));
266 for (i=0; i<argcw; i++) {
267 s = get_utf8_from_wstring(argvw[i], s=NULL);
268 argv[i] = s;
269 #ifdef DEBUG
270 fprintf(stderr, "Commandline arguments %d:(%s) [", i, argv[i]);
271 WriteConsoleW( hStderr, argvw[i], wcslen(argvw[i]), &ret, NULL);
272 fprintf(stderr, "]\n");
273 #endif /* DEBUG */
275 argv[argcw] = NULL;
276 *p_ac = argc;
277 *p_av = argv;
278 return file_system_codepage;
279 } else {
280 WARNING1("kpathsea: Ignoring unknown encoding `%s'", enc);
281 return 0;
286 spawnvp by file system codepage
289 fsyscp_spawnvp (int mode, const char *command, const char* const *argv)
291 int ret;
292 wchar_t *commandw, **argvw, **pw;
293 int i;
294 const char* const *p;
296 assert(command && argv);
297 for (i = 0, p = argv; *p; p++)
298 i++;
299 argvw = xcalloc (i + 3, sizeof (wchar_t *));
300 commandw = get_wstring_from_fsyscp(command, commandw=NULL);
301 p = argv;
302 pw = argvw;
303 while (*p) {
304 *pw = get_wstring_from_fsyscp(*p, *pw=NULL);
305 p++;
306 pw++;
308 *pw = NULL;
309 ret = _wspawnvp (mode, (const wchar_t *)commandw, (const wchar_t* const*) argvw);
310 if(commandw) free(commandw);
311 if (argvw) {
312 pw = argvw;
313 while (*pw) {
314 free (*pw);
315 pw++;
317 free (argvw);
320 return ret;
324 system by file system codepage
327 fsyscp_system (const char *cmd)
329 const char *p;
330 char *q;
331 char *av[4];
332 int len, ret;
333 int spacep = 0;
335 if (cmd == NULL)
336 return 1;
338 av[0] = xstrdup ("cmd.exe");
339 av[1] = xstrdup ("/c");
341 len = strlen (cmd) + 3;
342 spacep = is_include_space (cmd);
343 av[2] = xmalloc (len);
344 q = av[2];
345 if (spacep)
346 *q++ = '"';
347 for (p = cmd; *p; p++, q++) {
348 if (*p == '\'')
349 *q = '"';
350 else
351 *q = *p;
353 if (spacep)
354 *q++ = '"';
355 *q = '\0';
356 av[3] = NULL;
357 ret = fsyscp_spawnvp (_P_WAIT, av[0], (const char* const*) av);
358 free (av[0]);
359 free (av[1]);
360 free (av[2]);
361 return ret;
364 static int getc_len;
365 static int getc_buff[4];
367 int win32_getc(FILE *fp)
369 const int fd = fileno(fp);
370 HANDLE hStdin;
371 DWORD ret;
372 wchar_t wc[3];
373 char mbc[5];
374 int j;
375 static wchar_t wcbuf = L'\0';
377 if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
378 return getc(fp);
380 if (getc_len == 0)
382 hStdin = GetStdHandle(STD_INPUT_HANDLE);
383 if (wcbuf) {
384 wc[0] = wcbuf;
385 wcbuf = L'\0';
387 else if (ReadConsoleW(hStdin, wc, 1, &ret, NULL) == 0)
388 return EOF;
389 if (0xd800<=wc[0] && wc[0]<0xdc00) {
390 if (ReadConsoleW(hStdin, wc+1, 1, &ret, NULL) == 0)
391 return EOF;
392 if (0xdc00<=wc[1] && wc[1]<0xe000) {
393 wc[2]=L'\0';
394 } else {
395 wcbuf=wc[1];
396 wc[0]=0xfffd; /* illegal surrogate pair */
397 wc[1]=L'\0';
399 } else if (0xdc00<=wc[0] && wc[0]<0xe000) {
400 wc[0]=0xfffd; /* illegal surrogate pair */
401 wc[1]=L'\0';
402 } else {
403 wc[1]=L'\0';
405 get_utf8_from_wstring(wc,mbc);
406 j=strlen(mbc)-1;
407 while(j>=0) {
408 getc_buff[getc_len++]=(int)mbc[j--];
411 return getc_buff[--getc_len];
414 int win32_ungetc(int c, FILE *fp)
416 const int fd = fileno(fp);
418 if (!(fd == fileno(stdin) && _isatty(fd) && file_system_codepage == CP_UTF8))
419 return ungetc(c, fp);
421 assert(getc_len < 4);
422 return getc_buff[getc_len++] = c;
425 static int __win32_fputs(const char *str, HANDLE hStdout)
427 DWORD ret;
428 wchar_t *wstr;
430 wstr = get_wstring_from_utf8(str, wstr=NULL);
432 if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
433 free(wstr);
434 return EOF;
437 free(wstr);
438 return ret;
441 int win32_fputs(const char *str, FILE *fp)
443 const int fd = fileno(fp);
444 HANDLE hStdout;
446 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
447 && file_system_codepage == CP_UTF8))
448 return fputs(str, fp);
450 hStdout = (fd == fileno(stdout)) ?
451 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
453 return __win32_fputs(str, hStdout);
456 #define MAX_PROMPT_STR_SIZE 8192
458 int win32_vfprintf(FILE *fp, const char *format, va_list argp)
460 const int fd = fileno(fp);
461 HANDLE hStdout;
462 char buff[MAX_PROMPT_STR_SIZE];
463 int ret;
465 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
466 && file_system_codepage == CP_UTF8))
467 return vfprintf(fp, format, argp);
469 hStdout = (fd == fileno(stdout)) ?
470 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
472 ret = _vsnprintf(buff, sizeof(buff), format, argp);
473 if (__win32_fputs(buff, hStdout)==EOF) {
474 return EOF;
476 return ret;
479 int win32_puts(const char *str)
481 if (win32_fputs(str, stdout)==EOF) {
482 return EOF;
484 return puts("");
487 int win32_putc(int c, FILE *fp)
489 const int fd = fileno(fp);
490 HANDLE hStdout;
491 DWORD ret;
492 wchar_t wstr[3];
493 static int len = 0;
494 static char buff[5], *str;
496 if (!((fd == fileno(stdout) || fd == fileno(stderr)) && _isatty(fd)
497 && file_system_codepage == CP_UTF8))
498 return putc(c, fp);
500 hStdout = (fd == fileno(stdout)) ?
501 GetStdHandle(STD_OUTPUT_HANDLE) : GetStdHandle(STD_ERROR_HANDLE);
503 c &= 0xff;
505 if (c < 0x80) {
506 str = buff;
507 len = 1;
509 if (c < 0xc0) { /* ASCII or trailer */
510 *str++ = c;
511 len--;
512 if (len == 0) {
513 *str = '\0';
514 get_wstring_from_utf8(buff, wstr);
515 if (WriteConsoleW(hStdout, wstr, wcslen(wstr), &ret, NULL) == 0) {
516 len = 0;
517 return EOF;
520 else if (len < 0) return EOF;
521 return c;
523 else if (c < 0xc2) { len = 0; return EOF; } /* illegal */
524 else if (c < 0xe0) len = 2;
525 else if (c < 0xf0) len = 3;
526 else if (c < 0xf5) len = 4;
527 else { len = 0; return EOF; }
529 str = buff;
530 *str++ = c;
531 len--;
532 return c;