* New version 2.26
[alpine.git] / pith / charconv / filesys.c
blob29c99110e828ff938e0842975c79216b8a319383
1 /*
2 * ========================================================================
3 * Copyright 2013-2022 Eduardo Chappa
4 * Copyright 2006-2007 University of Washington
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * ========================================================================
15 /* includable WITHOUT dependency on c-client */
16 #include "../../c-client/mail.h"
17 #include "../../c-client/utf8.h"
19 #ifdef _WINDOWS
20 /* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */
21 #undef ERROR
22 #endif
24 #include <system.h>
25 #include <general.h>
27 #include "../../c-client/fs.h"
29 /* includable WITHOUT dependency on pico */
30 #include "../../pico/keydefs.h"
31 #ifdef _WINDOWS
32 #include "../../pico/osdep/mswin.h"
33 #endif
35 #include "filesys.h"
36 #include "utf8.h"
39 #define bad_char ((UCS) '?')
43 * Make it easier to use the convert_to_locale function for filenames
44 * and directory names. Note, only one at a time because there's only
45 * one buffer.
46 * This isn't being freed as it stands now.
48 char *
49 fname_to_locale(char *fname)
51 static char *fname_locale_buf = NULL;
52 static size_t fname_locale_len = 0;
53 char *converted_fname, *p;
55 if(fname == NULL){ /* special call to free memory */
56 if(fname_locale_buf) fs_give((void **) &fname_locale_buf);
57 fname_locale_len = 0;
58 return NULL;
61 p = convert_to_locale(fname);
62 if(p)
63 converted_fname = p;
64 else
65 converted_fname = fname;
67 if(converted_fname){
68 if(strlen(converted_fname)+1 > fname_locale_len){
69 if(fname_locale_buf)
70 fs_give((void **) &fname_locale_buf);
72 fname_locale_len = strlen(converted_fname)+1;
73 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
76 strncpy(fname_locale_buf, converted_fname, fname_locale_len);
77 fname_locale_buf[fname_locale_len-1] = '\0';
79 else{
80 if(fname_locale_len == 0){
81 fname_locale_len = 1;
82 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
85 fname_locale_buf[0] = '\0';
88 if(p)
89 fs_give((void **) &p);
91 return(fname_locale_buf);
96 * Make it easier to use the convert_to_utf8 function for filenames
97 * and directory names. Note, only one at a time because there's only
98 * one buffer.
99 * This isn't being freed as it stands now.
101 char *
102 fname_to_utf8(char *fname)
104 static char *fname_utf8_buf = NULL;
105 static size_t fname_utf8_len = 0;
106 char *converted_fname, *p;
108 if(fname == NULL){ /* special call to free memory */
109 if(fname_utf8_buf) fs_give((void **) &fname_utf8_buf);
110 fname_utf8_len = 0;
111 return NULL;
114 p = convert_to_utf8(fname, NULL, 0);
115 if(p)
116 converted_fname = p;
117 else
118 converted_fname = fname;
120 if(converted_fname){
121 if(strlen(converted_fname)+1 > fname_utf8_len){
122 if(fname_utf8_buf)
123 fs_give((void **) &fname_utf8_buf);
125 fname_utf8_len = strlen(converted_fname)+1;
126 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
129 strncpy(fname_utf8_buf, converted_fname, fname_utf8_len);
130 fname_utf8_buf[fname_utf8_len-1] = '\0';
132 else{
133 if(fname_utf8_len == 0){
134 fname_utf8_len = 1;
135 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
138 fname_utf8_buf[0] = '\0';
141 if(p)
142 fs_give((void **) &p);
144 return(fname_utf8_buf);
149 * The fp file pointer is open for read on a file which has contents
150 * that are encoded in the user's locale charset. That multibyte stream
151 * of characters is converted to wide characters and returned one at
152 * a time.
154 * Not sure what to do if an uninterpretable character happens. Returning
155 * the bad character now.
158 read_a_wide_char(FILE *fp,
159 void *input_cs) /* input_cs ignored in Windows */
161 #ifdef _WINDOWS
162 _TINT val;
164 val = _fgettc(fp);
165 if(val == _TEOF)
166 return(CCONV_EOF);
168 return((UCS) val);
169 #else /* UNIX */
170 unsigned long octets_so_far, remaining_octets;
171 unsigned char *inputp;
172 unsigned char inputbuf[20];
173 int c;
174 UCS ucs;
176 c = fgetc(fp);
177 if(c == EOF)
178 return(CCONV_EOF);
181 * Read enough bytes to make up a character and convert it to UCS-4.
183 memset(inputbuf, 0, sizeof(inputbuf));
184 inputbuf[0] = (unsigned char) c;
185 octets_so_far = 1;
186 for(;;){
187 remaining_octets = octets_so_far;
188 inputp = inputbuf;
189 ucs = mbtow(input_cs, &inputp, &remaining_octets);
190 switch(ucs){
191 case CCONV_BADCHAR:
192 return(bad_char);
194 case CCONV_NEEDMORE:
195 if(octets_so_far >= sizeof(inputbuf))
196 return(bad_char);
198 c = fgetc(fp);
199 if(c == EOF)
200 return(CCONV_EOF);
202 inputbuf[octets_so_far++] = (unsigned char) c;
203 break;
205 default:
206 /* got a good UCS-4 character */
207 return(ucs);
211 return(bad_char);
212 #endif /* UNIX */
217 write_a_wide_char(UCS ucs, FILE *fp)
219 #ifdef _WINDOWS
220 int rv = 1;
221 TCHAR w;
223 w = (TCHAR) ucs;
224 if(_fputtc(w, fp) == _TEOF)
225 rv = EOF;
227 return(rv);
228 #else /* UNIX */
229 int rv = 1;
230 int i, outchars;
231 unsigned char obuf[MAX(MB_LEN_MAX,32)];
233 if(ucs < 0x80){
234 obuf[0] = (unsigned char) ucs;
235 outchars = 1;
237 else{
238 outchars = wtomb((char *) obuf, ucs);
239 if(outchars < 0){
240 outchars = 1;
241 obuf[0] = bad_char; /* ??? */
245 for(i = 0; i < outchars; i++)
246 if(fputc(obuf[i], fp) == EOF){
247 rv = EOF;
248 break;
251 return(rv);
252 #endif /* UNIX */
257 our_stat(char *filename, struct stat *sbuf)
259 #ifdef _WINDOWS
260 LPTSTR f = NULL;
261 int ret = -1;
262 struct _stat s;
264 f = utf8_to_lptstr((LPSTR) filename);
265 if(f){
266 ret = _tstat(f, &s);
268 sbuf->st_dev = s.st_dev;
269 sbuf->st_ino = s.st_ino;
270 sbuf->st_mode = s.st_mode;
271 sbuf->st_nlink = s.st_nlink;
272 sbuf->st_uid = s.st_uid;
273 sbuf->st_gid = s.st_gid;
274 sbuf->st_rdev = s.st_rdev;
275 sbuf->st_size = s.st_size;
276 sbuf->st_atime = (time_t) s.st_atime;
277 sbuf->st_mtime = (time_t) s.st_mtime;
278 sbuf->st_ctime = (time_t) s.st_ctime;
280 fs_give((void **) &f);
283 return ret;
284 #else /* UNIX */
285 return(stat(fname_to_locale(filename), sbuf));
286 #endif /* UNIX */
291 our_lstat(char *filename, struct stat *sbuf)
293 #ifdef _WINDOWS
294 assert(0); /* lstat not used in Windows */
295 return(-1);
296 #else /* UNIX */
297 return(lstat(fname_to_locale(filename), sbuf));
298 #endif /* UNIX */
302 FILE *
303 our_fopen(char *path, char *mode)
305 #ifdef _WINDOWS
306 LPTSTR p = NULL, m = NULL;
307 FILE *ret = NULL;
308 char *mode_with_ccs = NULL;
309 char buf[500];
310 size_t len;
312 if(mode && (*mode == 'r' || *mode == 'a')){
313 char *force_bom_check = ", ccs=UNICODE";
315 if(strchr(mode, 'b'))
316 mode_with_ccs = mode;
317 else{
319 * The docs seem to say that we don't need the ccs parameter and
320 * if the file has a BOM at the beginning it will notice that and
321 * use it. However, we're not seeing that. Instead, what we see is
322 * that giving a parameter of UNICODE causes the desired behavior.
323 * This causes it to check for a BOM and if it finds one it uses it.
324 * If it doesn't find one, it treats the file as ANSI, which is what
325 * we want.
327 if((len = strlen(mode) + strlen(force_bom_check)) < sizeof(buf)){
328 len = sizeof(buf)-1;
329 mode_with_ccs = buf;
331 else
332 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
334 if(mode_with_ccs)
335 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_bom_check);
336 else
337 mode_with_ccs = mode; /* can't happen */
340 else if(mode && (*mode == 'w')){
341 char *force_utf8 = ", ccs=UTF-8";
343 if(strchr(mode, 'b'))
344 mode_with_ccs = mode;
345 else{
346 if((len = strlen(mode) + strlen(force_utf8)) < sizeof(buf)){
347 len = sizeof(buf)-1;
348 mode_with_ccs = buf;
350 else
351 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
353 if(mode_with_ccs)
354 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_utf8);
355 else
356 mode_with_ccs = mode; /* can't happen */
360 p = utf8_to_lptstr((LPSTR) path);
362 if(p){
363 m = utf8_to_lptstr((LPSTR) mode_with_ccs);
364 if(m){
365 ret = _tfopen(p, m);
366 MemFree((void *) m);
369 fs_give((void **) &p);
372 if(mode_with_ccs && mode_with_ccs != buf && mode_with_ccs != mode)
373 MemFree((void *) mode_with_ccs);
375 return ret;
376 #else /* UNIX */
377 return(fopen(fname_to_locale(path), mode));
378 #endif /* UNIX */
383 our_open(char *path, int flags, mode_t mode)
385 #ifdef _WINDOWS
386 LPTSTR p = NULL;
387 int ret = -1;
390 * Setting the _O_WTEXT flag when opening a file for reading
391 * will cause us to read the first few bytes to check for
392 * a BOM and to translate from that encoding if we find it.
393 * This only works with stream I/O, not low-level read/write.
395 * When opening for writing the flag _O_U8TEXT will cause
396 * us to put a UTF-8 BOM at the start of the file.
398 * O_TEXT will cause LF -> CRLF on output, opposite on input
399 * O_BINARY suppresses that.
400 * _O_U8TEXT implies O_TEXT.
403 p = utf8_to_lptstr((LPSTR) path);
405 if(p){
406 ret = _topen(p, flags, mode);
407 fs_give((void **) &p);
410 return ret;
411 #else /* UNIX */
412 return(open(fname_to_locale(path), flags, mode));
413 #endif /* UNIX */
418 our_creat(char *path, mode_t mode)
420 #ifdef _WINDOWS
421 LPTSTR p = NULL;
422 int ret = -1;
424 p = utf8_to_lptstr((LPSTR) path);
426 if(p){
427 ret = _tcreat(p, mode);
428 fs_give((void **) &p);
431 return ret;
432 #else /* UNIX */
433 return(creat(fname_to_locale(path), mode));
434 #endif /* UNIX */
439 our_mkdir(char *path, mode_t mode)
441 #ifdef _WINDOWS
442 /* mode is a noop for _WINDOWS */
443 LPTSTR p = NULL;
444 int ret = -1;
446 p = utf8_to_lptstr((LPSTR) path);
448 if(p){
449 ret = _tmkdir(p);
450 fs_give((void **) &p);
453 return ret;
454 #else /* UNIX */
455 return(mkdir(fname_to_locale(path), mode));
456 #endif /* UNIX */
461 our_rename(char *oldpath, char *newpath)
463 #ifdef _WINDOWS
464 LPTSTR pold = NULL, pnew = NULL;
465 int ret = -1;
467 pold = utf8_to_lptstr((LPSTR) oldpath);
468 pnew = utf8_to_lptstr((LPSTR) newpath);
470 if(pold && pnew)
471 ret = _trename(pold, pnew);
473 if(pold)
474 fs_give((void **) &pold);
475 if(pnew)
476 fs_give((void **) &pnew);
478 return ret;
479 #else /* UNIX */
480 char *p, *pold;
481 size_t len;
482 int ret = -1;
484 p = fname_to_locale(oldpath);
485 if(p){
486 len = strlen(p);
487 pold = (char *) fs_get((len+1) * sizeof(char));
488 strncpy(pold, p, len+1);
489 pold[len] = '\0';
491 ret = rename(pold, fname_to_locale(newpath));
492 fs_give((void **) &pold);
495 return ret;
496 #endif /* UNIX */
500 our_rmdir(char *path)
502 #ifdef _WINDOWS
503 LPTSTR p = NULL;
504 int ret = -1;
506 p = utf8_to_lptstr((LPSTR) path);
508 if(p){
509 ret = _trmdir(p);
510 fs_give((void **) &p);
513 return ret;
514 #else /* UNIX */
515 return(rmdir(fname_to_locale(path)));
516 #endif /* UNIX */
520 our_unlink(char *path)
522 #ifdef _WINDOWS
523 LPTSTR p = NULL;
524 int ret = -1;
526 p = utf8_to_lptstr((LPSTR) path);
528 if(p){
529 ret = _tunlink(p);
530 fs_give((void **) &p);
533 return ret;
534 #else /* UNIX */
535 return(unlink(fname_to_locale(path)));
536 #endif /* UNIX */
541 our_link(char *oldpath, char *newpath)
543 #ifdef _WINDOWS
544 assert(0); /* link not used in Windows */
545 return(-1);
546 #else /* UNIX */
547 char *p, *pold;
548 size_t len;
549 int ret = -1;
551 p = fname_to_locale(oldpath);
552 if(p){
553 len = strlen(p);
554 pold = (char *) fs_get((len+1) * sizeof(char));
555 strncpy(pold, p, len+1);
556 pold[len] = '\0';
558 ret = link(pold, fname_to_locale(newpath));
559 fs_give((void **) &pold);
562 return ret;
563 #endif /* UNIX */
568 our_truncate(char *path, off_t size)
570 int ret = -1;
571 #if defined(_WINDOWS) || !defined(HAVE_TRUNCATE)
572 int fdes;
573 #endif
575 #ifdef _WINDOWS
576 if((fdes = our_open(path, O_RDWR | O_CREAT | S_IREAD | S_IWRITE | _O_U8TEXT, 0600)) != -1){
577 if(chsize(fdes, size) == 0)
578 ret = 0;
580 close(fdes);
583 #else /* UNIX */
585 #ifdef HAVE_TRUNCATE
586 ret = truncate(fname_to_locale(path), size);
587 #else /* !HAVE_TRUNCATE */
589 if((fdes = our_open(path, O_RDWR, 0600)) != -1){
590 ret = chsize(fdes, size) ;
592 if(close(fdes))
593 ret = -1;
595 #endif /* !HAVE_TRUNCATE */
596 #endif /* UNIX */
598 return ret;
603 our_chmod(char *path, mode_t mode)
605 #ifdef _WINDOWS
606 LPTSTR p = NULL;
607 int ret = -1;
609 p = utf8_to_lptstr((LPSTR) path);
610 if(p){
611 ret = _tchmod(p, mode);
612 fs_give((void **) &p);
615 return ret;
616 #else /* UNIX */
617 return(chmod(fname_to_locale(path), mode));
618 #endif /* UNIX */
623 our_chown(char *path, uid_t owner, gid_t group)
625 #ifdef _WINDOWS
626 return 0;
627 #else /* UNIX */
628 return(chown(fname_to_locale(path), owner, group));
629 #endif /* UNIX */
634 our_utime(char *path, struct utimbuf *buf)
636 #ifdef _WINDOWS
637 LPTSTR p = NULL;
638 int ret = -1;
640 p = utf8_to_lptstr((LPSTR) path);
642 if(p){
643 ret = _tutime(p, buf);
644 fs_give((void **) &p);
647 return ret;
648 #else /* UNIX */
649 return(utime(fname_to_locale(path), buf));
650 #endif /* UNIX */
654 * Return a malloc'd utf8-encoded char * of the provided environment
655 * variable. The env_variable argument is assumed not to be UTF-8. Returns
656 * NULL if no such environment variable.
658 * We'll pretty much swap out getenv's where convenient. Windows pretty
659 * much doesn't want to do getenv once we do unicode
661 char *
662 our_getenv(char *env_variable)
664 #ifdef _WINDOWS
665 TCHAR lptstr_env_variable[MAXPATH+1], *p;
666 int i;
668 for(i = 0; env_variable[i] && i < MAXPATH; i++)
669 lptstr_env_variable[i] = env_variable[i];
670 lptstr_env_variable[i] = '\0';
671 if(p = _tgetenv(lptstr_env_variable))
672 return(lptstr_to_utf8(p));
673 else
674 return(NULL);
675 #else /* !_WINDOWS */
676 char *p, *utf8_p, *env_cpy;
677 size_t len;
678 if((p = getenv(env_variable)) != NULL){
679 /* all this when what we want is a cpystr */
680 utf8_p = fname_to_utf8(p);
681 len = strlen(utf8_p);
682 env_cpy = (char *)fs_get((len+1)*sizeof(char));
683 strncpy(env_cpy, utf8_p, len+1);
684 env_cpy[len] = '\0';
686 return(env_cpy);
688 else
689 return(NULL);
690 #endif /* !_WINDOWS */
694 our_access(char *path, int mode)
696 #ifdef _WINDOWS
697 LPTSTR p = NULL;
698 int ret = -1;
700 p = utf8_to_lptstr((LPSTR) path);
701 if(p){
702 ret = _taccess(p, mode);
703 fs_give((void **) &p);
706 return ret;
707 #else /* UNIX */
708 return(access(fname_to_locale(path), mode));
709 #endif /* UNIX */
714 * Fgets that doesn't do any character encoding translation or any
715 * of that Windows stuff.
717 char *
718 fgets_binary(char *s, int size, FILE *fp)
720 #ifdef _WINDOWS
721 char *p;
722 char c;
723 int r;
726 * Use fread low-level input instead of fgets.
727 * Maybe if we understood better we wouldn't need this.
729 if(!s)
730 return s;
732 p = s;
733 while(p-s < size-1 && (r=fread(&c, sizeof(c), (size_t) 1, fp)) == 1 && c != '\n')
734 *p++ = c;
736 if(p-s < size-1 && r == 1){
737 /* must have gotten to end of line */
738 *p++ = '\n';
741 *p = '\0';
742 return(s);
744 #else /* UNIX */
745 return(fgets(s, size, fp));
746 #endif /* UNIX */