1 /* knj.c: check for 2-Byte Kanji (CP 932, SJIS) codes.
3 Copyright 2010, 2014 Akira Kakuto.
4 Copyright 2013, 2014 TANAKA Takuji.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with this library; if not, see <http://www.gnu.org/licenses/>. */
19 #include <kpathsea/config.h>
20 #include <kpathsea/debug.h>
23 int is_cp932_system
, file_system_codepage
;
28 switch (is_cp932_system
) {
30 return((c
>=0x81 && c
<=0x9f) || (c
>=0xe0 && c
<=0xfc));
32 return(c
>=0x81 && c
<=0xfe);
34 return((c
>=0xa1 && c
<=0xc6) || (c
>=0xc9 && c
<=0xf9));
43 switch (is_cp932_system
) {
45 return(c
>=0x40 && c
<=0xfc && c
!=0x7f);
47 return(c
>=0x40 && c
<=0xfe && c
!=0x7f);
49 return((c
>=0x40 && c
<=0x7e) || (c
>=0xa1 && c
<=0xfe));
56 Get wide string from multibyte string.
59 get_wstring_from_mbstring(int cp
, const char *mbstr
, wchar_t *wstr
)
63 len
= MultiByteToWideChar(cp
, 0, mbstr
, -1, wstr
, 0);
65 FATAL("cannot convert string to wide string");
68 wstr
= xmalloc(sizeof(wchar_t)*(len
+1));
70 len
= MultiByteToWideChar(cp
, 0, mbstr
, -1, wstr
, len
+1);
72 FATAL("cannot convert multibyte string to wide string");
78 Get multibyte string from wide string.
81 get_mbstring_from_wstring(int cp
, const wchar_t *wstr
, char *mbstr
)
85 len
= WideCharToMultiByte(cp
, 0, wstr
, -1, mbstr
, 0, NULL
, NULL
);
87 FATAL("cannot convert string to multibyte string");
90 mbstr
= xmalloc(len
+1);
92 len
= WideCharToMultiByte(cp
, 0, wstr
, -1, mbstr
, len
+1, NULL
, NULL
);
94 FATAL("cannot convert wide string to multibyte string");
100 xfopen by file system codepage
103 fsyscp_xfopen (const char *filename
, const char *mode
)
106 wchar_t *fnamew
, modew
[4];
108 #if defined (KPSE_COMPAT_API)
111 assert(filename
&& mode
);
113 fnamew
= get_wstring_from_fsyscp(filename
, fnamew
=NULL
);
114 for(i
=0; (modew
[i
]=(wchar_t)mode
[i
]); i
++) {} /* mode[i] must be ASCII */
115 f
= _wfopen(fnamew
, modew
);
117 FATAL_PERROR(filename
);
118 #if defined (KPSE_COMPAT_API)
120 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN
)) {
122 fprintf (stderr
, "fsyscp_xfopen(%s [", filename
);
123 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE
), fnamew
, wcslen( fnamew
), NULL
, NULL
);
125 fprintf (stderr
, "], %s) => 0x%I64x\n", mode
, (unsigned __int64
) f
);
127 fprintf (stderr
, "], %s) => 0x%lx\n", mode
, (unsigned long) f
);
138 fopen by file system codepage
141 fsyscp_fopen (const char *filename
, const char *mode
)
144 wchar_t *fnamew
, modew
[4];
146 #if defined (KPSE_COMPAT_API)
149 assert(filename
&& mode
);
151 fnamew
= get_wstring_from_fsyscp(filename
, fnamew
=NULL
);
152 for(i
=0; (modew
[i
]=(wchar_t)mode
[i
]); i
++) {} /* mode[i] must be ASCII */
153 f
= _wfopen(fnamew
, modew
);
154 #if defined (KPSE_COMPAT_API)
157 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN
)) {
159 fprintf (stderr
, "fsyscp_fopen(%s [", filename
);
160 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE
), fnamew
, wcslen( fnamew
), NULL
, NULL
);
162 fprintf (stderr
, "], %s) => 0x%I64x\n", mode
, (unsigned __int64
) f
);
164 fprintf (stderr
, "], %s) => 0x%lx\n", mode
, (unsigned long) f
);
176 popen by file system codepage
179 is_include_space(const char *s
)
190 fsyscp_popen (const char *command
, const char *mode
)
193 wchar_t *commandw
, modew
[4];
195 #if defined (KPSE_COMPAT_API)
198 assert(command
&& mode
);
200 if (is_include_space (command
)) {
203 command2
= xmalloc (strlen (command
) + 3);
211 commandw
= get_wstring_from_fsyscp(command2
, commandw
=NULL
);
214 commandw
= get_wstring_from_fsyscp(command
, commandw
=NULL
);
216 for(i
=0; (modew
[i
]=(wchar_t)mode
[i
]); i
++) {} /* mode[i] must be ASCII */
217 f
= _wpopen(commandw
, modew
);
218 #if defined (KPSE_COMPAT_API)
221 if (KPATHSEA_DEBUG_P (KPSE_DEBUG_FOPEN
)) {
223 fprintf (stderr
, "fsyscp_popen(%s [", command
);
224 WriteConsoleW( GetStdHandle( STD_ERROR_HANDLE
), commandw
, wcslen( commandw
), NULL
, NULL
);
226 fprintf (stderr
, "], %s) => 0x%I64x\n", mode
, (unsigned __int64
) f
);
228 fprintf (stderr
, "], %s) => 0x%lx\n", mode
, (unsigned long) f
);
235 /* We use always binary mode on Windows */
236 if(f
) _setmode (fileno (f
), _O_BINARY
);
242 get_command_line_args_utf8 (const_string enc
, int *p_ac
, char ***p_av
)
247 if (!enc
|| !strncmp(enc
,"",1)) return 0;
250 fprintf(stderr
, "command_line_encoding (%s)\n", enc
);
252 if (!(strncmp(enc
,"utf8",5) && strncmp(enc
,"utf-8",6))) {
259 hStderr
= GetStdHandle( STD_ERROR_HANDLE
);
261 file_system_codepage
= CP_UTF8
;
263 argvw
= CommandLineToArgvW(GetCommandLineW(), &argcw
);
265 argv
= xmalloc(sizeof(char *)*(argcw
+1));
266 for (i
=0; i
<argcw
; i
++) {
267 s
= get_utf8_from_wstring(argvw
[i
], s
=NULL
);
270 fprintf(stderr
, "Commandline arguments %d:(%s) [", i
, argv
[i
]);
271 WriteConsoleW( hStderr
, argvw
[i
], wcslen(argvw
[i
]), &ret
, NULL
);
272 fprintf(stderr
, "]\n");
278 return file_system_codepage
;
280 WARNING1("kpathsea: Ignoring unknown encoding `%s'", enc
);
286 spawnvp by file system codepage
289 fsyscp_spawnvp (int mode
, const char *command
, const char* const *argv
)
292 wchar_t *commandw
, **argvw
, **pw
;
294 const char* const *p
;
296 assert(command
&& argv
);
297 for (i
= 0, p
= argv
; *p
; p
++)
299 argvw
= xcalloc (i
+ 3, sizeof (wchar_t *));
300 commandw
= get_wstring_from_fsyscp(command
, commandw
=NULL
);
304 *pw
= get_wstring_from_fsyscp(*p
, *pw
=NULL
);
309 ret
= _wspawnvp (mode
, (const wchar_t *)commandw
, (const wchar_t* const*) argvw
);
310 if(commandw
) free(commandw
);
324 system by file system codepage
327 fsyscp_system (const char *cmd
)
338 av
[0] = xstrdup ("cmd.exe");
339 av
[1] = xstrdup ("/c");
341 len
= strlen (cmd
) + 3;
342 spacep
= is_include_space (cmd
);
343 av
[2] = xmalloc (len
);
347 for (p
= cmd
; *p
; p
++, q
++) {
357 ret
= fsyscp_spawnvp (_P_WAIT
, av
[0], (const char* const*) av
);
365 static int getc_buff
[4];
367 int win32_getc(FILE *fp
)
369 const int fd
= fileno(fp
);
375 static wchar_t wcbuf
= L
'\0';
377 if (!(fd
== fileno(stdin
) && _isatty(fd
) && file_system_codepage
== CP_UTF8
))
382 hStdin
= GetStdHandle(STD_INPUT_HANDLE
);
387 else if (ReadConsoleW(hStdin
, wc
, 1, &ret
, NULL
) == 0)
389 if (0xd800<=wc
[0] && wc
[0]<0xdc00) {
390 if (ReadConsoleW(hStdin
, wc
+1, 1, &ret
, NULL
) == 0)
392 if (0xdc00<=wc
[1] && wc
[1]<0xe000) {
396 wc
[0]=0xfffd; /* illegal surrogate pair */
399 } else if (0xdc00<=wc
[0] && wc
[0]<0xe000) {
400 wc
[0]=0xfffd; /* illegal surrogate pair */
405 get_utf8_from_wstring(wc
,mbc
);
408 getc_buff
[getc_len
++]=(int)mbc
[j
--];
411 return getc_buff
[--getc_len
];
414 int win32_ungetc(int c
, FILE *fp
)
416 const int fd
= fileno(fp
);
418 if (!(fd
== fileno(stdin
) && _isatty(fd
) && file_system_codepage
== CP_UTF8
))
419 return ungetc(c
, fp
);
421 assert(getc_len
< 4);
422 return getc_buff
[getc_len
++] = c
;
425 static int __win32_fputs(const char *str
, HANDLE hStdout
)
430 wstr
= get_wstring_from_utf8(str
, wstr
=NULL
);
432 if (WriteConsoleW(hStdout
, wstr
, wcslen(wstr
), &ret
, NULL
) == 0) {
441 int win32_fputs(const char *str
, FILE *fp
)
443 const int fd
= fileno(fp
);
446 if (!((fd
== fileno(stdout
) || fd
== fileno(stderr
)) && _isatty(fd
)
447 && file_system_codepage
== CP_UTF8
))
448 return fputs(str
, fp
);
450 hStdout
= (fd
== fileno(stdout
)) ?
451 GetStdHandle(STD_OUTPUT_HANDLE
) : GetStdHandle(STD_ERROR_HANDLE
);
453 return __win32_fputs(str
, hStdout
);
456 #define MAX_PROMPT_STR_SIZE 8192
458 int win32_vfprintf(FILE *fp
, const char *format
, va_list argp
)
460 const int fd
= fileno(fp
);
462 char buff
[MAX_PROMPT_STR_SIZE
];
465 if (!((fd
== fileno(stdout
) || fd
== fileno(stderr
)) && _isatty(fd
)
466 && file_system_codepage
== CP_UTF8
))
467 return vfprintf(fp
, format
, argp
);
469 hStdout
= (fd
== fileno(stdout
)) ?
470 GetStdHandle(STD_OUTPUT_HANDLE
) : GetStdHandle(STD_ERROR_HANDLE
);
472 ret
= _vsnprintf(buff
, sizeof(buff
), format
, argp
);
473 if (__win32_fputs(buff
, hStdout
)==EOF
) {
479 int win32_puts(const char *str
)
481 if (win32_fputs(str
, stdout
)==EOF
) {
487 int win32_putc(int c
, FILE *fp
)
489 const int fd
= fileno(fp
);
494 static char buff
[5], *str
;
496 if (!((fd
== fileno(stdout
) || fd
== fileno(stderr
)) && _isatty(fd
)
497 && file_system_codepage
== CP_UTF8
))
500 hStdout
= (fd
== fileno(stdout
)) ?
501 GetStdHandle(STD_OUTPUT_HANDLE
) : GetStdHandle(STD_ERROR_HANDLE
);
509 if (c
< 0xc0) { /* ASCII or trailer */
514 get_wstring_from_utf8(buff
, wstr
);
515 if (WriteConsoleW(hStdout
, wstr
, wcslen(wstr
), &ret
, NULL
) == 0) {
520 else if (len
< 0) return EOF
;
523 else if (c
< 0xc2) { len
= 0; return EOF
; } /* illegal */
524 else if (c
< 0xe0) len
= 2;
525 else if (c
< 0xf0) len
= 3;
526 else if (c
< 0xf5) len
= 4;
527 else { len
= 0; return EOF
; }