* Update to version 2.19.5
[alpine.git] / pith / charconv / filesys.c
blob3be45a5f77a863c86609b9268f4edadc2fe7d35a
1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filesys.c 770 2007-10-24 00:23:09Z hubert@u.washington.edu $";
3 #endif
5 /*
6 * ========================================================================
7 * Copyright 2006-2007 University of Washington
8 * Copyright 2013-2014 Eduardo Chappa
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /* includable WITHOUT dependency on c-client */
20 #include "../../c-client/mail.h"
21 #include "../../c-client/utf8.h"
23 #ifdef _WINDOWS
24 /* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */
25 #undef ERROR
26 #endif
28 #include <system.h>
29 #include <general.h>
31 #include "../../c-client/fs.h"
33 /* includable WITHOUT dependency on pico */
34 #include "../../pico/keydefs.h"
35 #ifdef _WINDOWS
36 #include "../../pico/osdep/mswin.h"
37 #endif
39 #include "filesys.h"
40 #include "utf8.h"
43 #define bad_char ((UCS) '?')
47 * Make it easier to use the convert_to_locale function for filenames
48 * and directory names. Note, only one at a time because there's only
49 * one buffer.
50 * This isn't being freed as it stands now.
52 char *
53 fname_to_locale(char *fname)
55 static char *fname_locale_buf = NULL;
56 static size_t fname_locale_len = 0;
57 char *converted_fname, *p;
59 p = convert_to_locale(fname);
60 if(p)
61 converted_fname = p;
62 else
63 converted_fname = fname;
65 if(converted_fname){
66 if(strlen(converted_fname)+1 > fname_locale_len){
67 if(fname_locale_buf)
68 fs_give((void **) &fname_locale_buf);
70 fname_locale_len = strlen(converted_fname)+1;
71 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
74 strncpy(fname_locale_buf, converted_fname, fname_locale_len);
75 fname_locale_buf[fname_locale_len-1] = '\0';
77 else{
78 if(fname_locale_len == 0){
79 fname_locale_len = 1;
80 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
83 fname_locale_buf[0] = '\0';
86 if(p)
87 fs_give((void **) &p);
89 return(fname_locale_buf);
94 * Make it easier to use the convert_to_utf8 function for filenames
95 * and directory names. Note, only one at a time because there's only
96 * one buffer.
97 * This isn't being freed as it stands now.
99 char *
100 fname_to_utf8(char *fname)
102 static char *fname_utf8_buf = NULL;
103 static size_t fname_utf8_len = 0;
104 char *converted_fname, *p;
106 p = convert_to_utf8(fname, NULL, 0);
107 if(p)
108 converted_fname = p;
109 else
110 converted_fname = fname;
112 if(converted_fname){
113 if(strlen(converted_fname)+1 > fname_utf8_len){
114 if(fname_utf8_buf)
115 fs_give((void **) &fname_utf8_buf);
117 fname_utf8_len = strlen(converted_fname)+1;
118 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
121 strncpy(fname_utf8_buf, converted_fname, fname_utf8_len);
122 fname_utf8_buf[fname_utf8_len-1] = '\0';
124 else{
125 if(fname_utf8_len == 0){
126 fname_utf8_len = 1;
127 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
130 fname_utf8_buf[0] = '\0';
133 if(p)
134 fs_give((void **) &p);
136 return(fname_utf8_buf);
141 * The fp file pointer is open for read on a file which has contents
142 * that are encoded in the user's locale charset. That multibyte stream
143 * of characters is converted to wide characters and returned one at
144 * a time.
146 * Not sure what to do if an uninterpretable character happens. Returning
147 * the bad character now.
150 read_a_wide_char(FILE *fp,
151 void *input_cs) /* input_cs ignored in Windows */
153 #ifdef _WINDOWS
154 _TINT val;
156 val = _fgettc(fp);
157 if(val == _TEOF)
158 return(CCONV_EOF);
160 return((UCS) val);
161 #else /* UNIX */
162 unsigned long octets_so_far, remaining_octets;
163 unsigned char *inputp;
164 unsigned char inputbuf[20];
165 int c;
166 UCS ucs;
168 c = fgetc(fp);
169 if(c == EOF)
170 return(CCONV_EOF);
173 * Read enough bytes to make up a character and convert it to UCS-4.
175 memset(inputbuf, 0, sizeof(inputbuf));
176 inputbuf[0] = (unsigned char) c;
177 octets_so_far = 1;
178 for(;;){
179 remaining_octets = octets_so_far;
180 inputp = inputbuf;
181 ucs = mbtow(input_cs, &inputp, &remaining_octets);
182 switch(ucs){
183 case CCONV_BADCHAR:
184 return(bad_char);
186 case CCONV_NEEDMORE:
187 if(octets_so_far >= sizeof(inputbuf))
188 return(bad_char);
190 c = fgetc(fp);
191 if(c == EOF)
192 return(CCONV_EOF);
194 inputbuf[octets_so_far++] = (unsigned char) c;
195 break;
197 default:
198 /* got a good UCS-4 character */
199 return(ucs);
203 return(bad_char);
204 #endif /* UNIX */
209 write_a_wide_char(UCS ucs, FILE *fp)
211 #ifdef _WINDOWS
212 int rv = 1;
213 TCHAR w;
215 w = (TCHAR) ucs;
216 if(_fputtc(w, fp) == _TEOF)
217 rv = EOF;
219 return(rv);
220 #else /* UNIX */
221 int rv = 1;
222 int i, outchars;
223 unsigned char obuf[MAX(MB_LEN_MAX,32)];
225 if(ucs < 0x80){
226 obuf[0] = (unsigned char) ucs;
227 outchars = 1;
229 else{
230 outchars = wtomb((char *) obuf, ucs);
231 if(outchars < 0){
232 outchars = 1;
233 obuf[0] = bad_char; /* ??? */
237 for(i = 0; i < outchars; i++)
238 if(fputc(obuf[i], fp) == EOF){
239 rv = EOF;
240 break;
243 return(rv);
244 #endif /* UNIX */
249 our_stat(char *filename, struct stat *sbuf)
251 #ifdef _WINDOWS
252 LPTSTR f = NULL;
253 int ret = -1;
254 struct _stat s;
256 f = utf8_to_lptstr((LPSTR) filename);
257 if(f){
258 ret = _tstat(f, &s);
260 sbuf->st_dev = s.st_dev;
261 sbuf->st_ino = s.st_ino;
262 sbuf->st_mode = s.st_mode;
263 sbuf->st_nlink = s.st_nlink;
264 sbuf->st_uid = s.st_uid;
265 sbuf->st_gid = s.st_gid;
266 sbuf->st_rdev = s.st_rdev;
267 sbuf->st_size = s.st_size;
268 sbuf->st_atime = (time_t) s.st_atime;
269 sbuf->st_mtime = (time_t) s.st_mtime;
270 sbuf->st_ctime = (time_t) s.st_ctime;
272 fs_give((void **) &f);
275 return ret;
276 #else /* UNIX */
277 return(stat(fname_to_locale(filename), sbuf));
278 #endif /* UNIX */
283 our_lstat(char *filename, struct stat *sbuf)
285 #ifdef _WINDOWS
286 assert(0); /* lstat not used in Windows */
287 return(-1);
288 #else /* UNIX */
289 return(lstat(fname_to_locale(filename), sbuf));
290 #endif /* UNIX */
294 FILE *
295 our_fopen(char *path, char *mode)
297 #ifdef _WINDOWS
298 LPTSTR p = NULL, m = NULL;
299 FILE *ret = NULL;
300 char *mode_with_ccs = NULL;
301 char buf[500];
302 size_t len;
304 if(mode && (*mode == 'r' || *mode == 'a')){
305 char *force_bom_check = ", ccs=UNICODE";
307 if(strchr(mode, 'b'))
308 mode_with_ccs = mode;
309 else{
311 * The docs seem to say that we don't need the ccs parameter and
312 * if the file has a BOM at the beginning it will notice that and
313 * use it. However, we're not seeing that. Instead, what we see is
314 * that giving a parameter of UNICODE causes the desired behavior.
315 * This causes it to check for a BOM and if it finds one it uses it.
316 * If it doesn't find one, it treats the file as ANSI, which is what
317 * we want.
319 if((len = strlen(mode) + strlen(force_bom_check)) < sizeof(buf)){
320 len = sizeof(buf)-1;
321 mode_with_ccs = buf;
323 else
324 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
326 if(mode_with_ccs)
327 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_bom_check);
328 else
329 mode_with_ccs = mode; /* can't happen */
332 else if(mode && (*mode == 'w')){
333 char *force_utf8 = ", ccs=UTF-8";
335 if(strchr(mode, 'b'))
336 mode_with_ccs = mode;
337 else{
338 if((len = strlen(mode) + strlen(force_utf8)) < sizeof(buf)){
339 len = sizeof(buf)-1;
340 mode_with_ccs = buf;
342 else
343 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
345 if(mode_with_ccs)
346 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_utf8);
347 else
348 mode_with_ccs = mode; /* can't happen */
352 p = utf8_to_lptstr((LPSTR) path);
354 if(p){
355 m = utf8_to_lptstr((LPSTR) mode_with_ccs);
356 if(m){
357 ret = _tfopen(p, m);
358 MemFree((void *) m);
361 fs_give((void **) &p);
364 if(mode_with_ccs && mode_with_ccs != buf && mode_with_ccs != mode)
365 MemFree((void *) mode_with_ccs);
367 return ret;
368 #else /* UNIX */
369 return(fopen(fname_to_locale(path), mode));
370 #endif /* UNIX */
375 our_open(char *path, int flags, mode_t mode)
377 #ifdef _WINDOWS
378 LPTSTR p = NULL;
379 int ret = -1;
382 * Setting the _O_WTEXT flag when opening a file for reading
383 * will cause us to read the first few bytes to check for
384 * a BOM and to translate from that encoding if we find it.
385 * This only works with stream I/O, not low-level read/write.
387 * When opening for writing the flag _O_U8TEXT will cause
388 * us to put a UTF-8 BOM at the start of the file.
390 * O_TEXT will cause LF -> CRLF on output, opposite on input
391 * O_BINARY suppresses that.
392 * _O_U8TEXT implies O_TEXT.
395 p = utf8_to_lptstr((LPSTR) path);
397 if(p){
398 ret = _topen(p, flags, mode);
399 fs_give((void **) &p);
402 return ret;
403 #else /* UNIX */
404 return(open(fname_to_locale(path), flags, mode));
405 #endif /* UNIX */
410 our_creat(char *path, mode_t mode)
412 #ifdef _WINDOWS
413 LPTSTR p = NULL;
414 int ret = -1;
416 p = utf8_to_lptstr((LPSTR) path);
418 if(p){
419 ret = _tcreat(p, mode);
420 fs_give((void **) &p);
423 return ret;
424 #else /* UNIX */
425 return(creat(fname_to_locale(path), mode));
426 #endif /* UNIX */
431 our_mkdir(char *path, mode_t mode)
433 #ifdef _WINDOWS
434 /* mode is a noop for _WINDOWS */
435 LPTSTR p = NULL;
436 int ret = -1;
438 p = utf8_to_lptstr((LPSTR) path);
440 if(p){
441 ret = _tmkdir(p);
442 fs_give((void **) &p);
445 return ret;
446 #else /* UNIX */
447 return(mkdir(fname_to_locale(path), mode));
448 #endif /* UNIX */
453 our_rename(char *oldpath, char *newpath)
455 #ifdef _WINDOWS
456 LPTSTR pold = NULL, pnew = NULL;
457 int ret = -1;
459 pold = utf8_to_lptstr((LPSTR) oldpath);
460 pnew = utf8_to_lptstr((LPSTR) newpath);
462 if(pold && pnew)
463 ret = _trename(pold, pnew);
465 if(pold)
466 fs_give((void **) &pold);
467 if(pnew)
468 fs_give((void **) &pnew);
470 return ret;
471 #else /* UNIX */
472 char *p, *pold;
473 size_t len;
474 int ret = -1;
476 p = fname_to_locale(oldpath);
477 if(p){
478 len = strlen(p);
479 pold = (char *) fs_get((len+1) * sizeof(char));
480 strncpy(pold, p, len+1);
481 pold[len] = '\0';
483 ret = rename(pold, fname_to_locale(newpath));
484 fs_give((void **) &pold);
487 return ret;
488 #endif /* UNIX */
493 our_unlink(char *path)
495 #ifdef _WINDOWS
496 LPTSTR p = NULL;
497 int ret = -1;
499 p = utf8_to_lptstr((LPSTR) path);
501 if(p){
502 ret = _tunlink(p);
503 fs_give((void **) &p);
506 return ret;
507 #else /* UNIX */
508 return(unlink(fname_to_locale(path)));
509 #endif /* UNIX */
514 our_link(char *oldpath, char *newpath)
516 #ifdef _WINDOWS
517 assert(0); /* link not used in Windows */
518 return(-1);
519 #else /* UNIX */
520 char *p, *pold;
521 size_t len;
522 int ret = -1;
524 p = fname_to_locale(oldpath);
525 if(p){
526 len = strlen(p);
527 pold = (char *) fs_get((len+1) * sizeof(char));
528 strncpy(pold, p, len+1);
529 pold[len] = '\0';
531 ret = link(pold, fname_to_locale(newpath));
532 fs_give((void **) &pold);
535 return ret;
536 #endif /* UNIX */
541 our_truncate(char *path, off_t size)
543 int ret = -1;
544 #if defined(_WINDOWS) || !defined(HAVE_TRUNCATE)
545 int fdes;
546 #endif
548 #ifdef _WINDOWS
549 if((fdes = our_open(path, O_RDWR | O_CREAT | S_IREAD | S_IWRITE | _O_U8TEXT, 0600)) != -1){
550 if(chsize(fdes, size) == 0)
551 ret = 0;
553 close(fdes);
556 #else /* UNIX */
558 #ifdef HAVE_TRUNCATE
559 ret = truncate(fname_to_locale(path), size);
560 #else /* !HAVE_TRUNCATE */
562 if((fdes = our_open(path, O_RDWR, 0600)) != -1){
563 ret = chsize(fdes, size) ;
565 if(close(fdes))
566 ret = -1;
568 #endif /* !HAVE_TRUNCATE */
569 #endif /* UNIX */
571 return ret;
576 our_chmod(char *path, mode_t mode)
578 #ifdef _WINDOWS
579 LPTSTR p = NULL;
580 int ret = -1;
582 p = utf8_to_lptstr((LPSTR) path);
583 if(p){
584 ret = _tchmod(p, mode);
585 fs_give((void **) &p);
588 return ret;
589 #else /* UNIX */
590 return(chmod(fname_to_locale(path), mode));
591 #endif /* UNIX */
596 our_chown(char *path, uid_t owner, gid_t group)
598 #ifdef _WINDOWS
599 return 0;
600 #else /* UNIX */
601 return(chown(fname_to_locale(path), owner, group));
602 #endif /* UNIX */
607 our_utime(char *path, struct utimbuf *buf)
609 #ifdef _WINDOWS
610 LPTSTR p = NULL;
611 int ret = -1;
613 p = utf8_to_lptstr((LPSTR) path);
615 if(p){
616 ret = _tutime(p, buf);
617 fs_give((void **) &p);
620 return ret;
621 #else /* UNIX */
622 return(utime(fname_to_locale(path), buf));
623 #endif /* UNIX */
627 * Return a malloc'd utf8-encoded char * of the provided environment
628 * variable. The env_variable argument is assumed not to be UTF-8. Returns
629 * NULL if no such environment variable.
631 * We'll pretty much swap out getenv's where convenient. Windows pretty
632 * much doesn't want to do getenv once we do unicode
634 char *
635 our_getenv(char *env_variable)
637 #ifdef _WINDOWS
638 TCHAR lptstr_env_variable[MAXPATH+1], *p;
639 int i;
641 for(i = 0; env_variable[i] && i < MAXPATH; i++)
642 lptstr_env_variable[i] = env_variable[i];
643 lptstr_env_variable[i] = '\0';
644 if(p = _tgetenv(lptstr_env_variable))
645 return(lptstr_to_utf8(p));
646 else
647 return(NULL);
648 #else /* !_WINDOWS */
649 char *p, *utf8_p, *env_cpy;
650 size_t len;
651 if((p = getenv(env_variable)) != NULL){
652 /* all this when what we want is a cpystr */
653 utf8_p = fname_to_utf8(p);
654 len = strlen(utf8_p);
655 env_cpy = (char *)fs_get((len+1)*sizeof(char));
656 strncpy(env_cpy, utf8_p, len+1);
657 env_cpy[len] = '\0';
659 return(env_cpy);
661 else
662 return(NULL);
663 #endif /* !_WINDOWS */
668 our_access(char *path, int mode)
670 #ifdef _WINDOWS
671 LPTSTR p = NULL;
672 int ret = -1;
674 p = utf8_to_lptstr((LPSTR) path);
675 if(p){
676 ret = _taccess(p, mode);
677 fs_give((void **) &p);
680 return ret;
681 #else /* UNIX */
682 return(access(fname_to_locale(path), mode));
683 #endif /* UNIX */
688 * Fgets that doesn't do any character encoding translation or any
689 * of that Windows stuff.
691 char *
692 fgets_binary(char *s, int size, FILE *fp)
694 #ifdef _WINDOWS
695 char *p;
696 char c;
697 int r;
700 * Use fread low-level input instead of fgets.
701 * Maybe if we understood better we wouldn't need this.
703 if(!s)
704 return s;
706 p = s;
707 while(p-s < size-1 && (r=fread(&c, sizeof(c), (size_t) 1, fp)) == 1 && c != '\n')
708 *p++ = c;
710 if(p-s < size-1 && r == 1){
711 /* must have gotten to end of line */
712 *p++ = '\n';
715 *p = '\0';
716 return(s);
718 #else /* UNIX */
719 return(fgets(s, size, fp));
720 #endif /* UNIX */