FTP: Test and fix handling of symbolic link name containing spaces
[elinks.git] / src / protocol / ftp / parse.c
blob8048406a8702e582c5c9b134db0623963c28d8b6
1 /* Parsing of FTP `ls' directory output. */
3 /* Parts of this file was part of GNU Wget
4 * Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. */
6 #ifdef HAVE_CONFIG_H
7 #include "config.h"
8 #endif
10 #include <errno.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #ifdef HAVE_STRING_H
14 # include <string.h>
15 #else
16 # include <strings.h>
17 #endif
18 #ifdef HAVE_UNISTD_H
19 # include <unistd.h>
20 #endif
21 #include <sys/stat.h>
22 #include <sys/types.h>
24 #include "elinks.h"
26 #include "osdep/ascii.h"
27 #include "osdep/stat.h"
28 #include "protocol/date.h"
29 #include "protocol/ftp/parse.h"
30 #include "util/conv.h"
31 #include "util/string.h"
32 #include "util/time.h"
35 /* Examples of what the FTP parser is supposed to handle (and not handle) can
36 * be found in the test-ftp-parser file. */
38 #define skip_space_end(src, end) \
39 do { while ((src) < (end) && *(src) == ' ') (src)++; } while (0)
41 #define skip_nonspace_end(src, end) \
42 do { while ((src) < (end) && *(src) != ' ') (src)++; } while (0)
44 static off_t
45 parse_ftp_number(unsigned char **src, unsigned char *end, off_t from, off_t to)
47 off_t number = 0;
48 unsigned char *pos = *src;
50 for (; pos < end && isdigit(*pos); pos++)
51 number = (*pos - '0') + 10 * number;
53 *src = pos;
55 if (number < from || to < number)
56 return -1;
58 return number;
62 /* Parser for the EPLF format (see http://pobox.com/~djb/proto/eplf.txt).
64 * Lines end with \r\n (CR-LF), but that is handled elsewhere.
67 enum ftp_eplf {
68 FTP_EPLF_FILENAME = ASCII_TAB, /* Filename follows */
69 FTP_EPLF_PLAINFILE = 'r', /* RETR is possible */
70 FTP_EPLF_DIRECTORY = '/', /* CWD is possible */
71 FTP_EPLF_SIZE = 's', /* File size follows */
72 FTP_EPLF_MTIME = 'm', /* Modification time follows */
73 FTP_EPLF_ID = 'i', /* Unique file id follows */
76 static struct ftp_file_info *
77 parse_ftp_eplf_response(struct ftp_file_info *info, unsigned char *src, int len)
79 /* Skip the '+'-char which starts the line. */
80 unsigned char *end = src + len;
81 unsigned char *pos = src++;
83 /* Handle the series of facts about the file. */
85 for (; src < end && pos; src = pos + 1) {
86 /* Find the end of the current fact. */
87 pos = memchr(src, ',', end - src);
88 if (!pos) pos = end;
90 switch (*src++) {
91 case FTP_EPLF_FILENAME:
92 if (src >= end) break;
93 info->name.source = src;
94 info->name.length = end - src;
95 return info;
97 case FTP_EPLF_DIRECTORY:
98 info->type = FTP_FILE_DIRECTORY;
99 break;
101 case FTP_EPLF_PLAINFILE:
102 info->type = FTP_FILE_PLAINFILE;
103 break;
105 case FTP_EPLF_SIZE:
106 if (src >= pos) break;
107 info->size = parse_ftp_number(&src, pos, 0, OFFT_MAX);
108 break;
110 case FTP_EPLF_MTIME:
111 if (src >= pos) break;
112 info->mtime = (time_t) parse_ftp_number(&src, pos, 0, LONG_MAX);
113 break;
114 case FTP_EPLF_ID:
115 /* Not used */
116 break;
120 return NULL;
124 /* Parser for UNIX-style listing: */
126 enum ftp_unix {
127 FTP_UNIX_PERMISSIONS,
128 FTP_UNIX_SIZE,
129 FTP_UNIX_DAY,
130 FTP_UNIX_TIME,
131 FTP_UNIX_NAME
134 /* Converts Un*x-style symbolic permissions to number-style ones, e.g. string
135 * rwxr-xr-x to 755.
136 * Borrowed from lftp source code by Alexander V. Lukyanov.
137 * On parse error, it returns 0. */
138 static int
139 parse_ftp_unix_permissions(const unsigned char *src, int len)
141 mode_t perms = 0;
143 if (len != 9
144 && !(len == 10 && src[9] == '+')) /* ACL tag */
145 return 0;
147 /* User permissions */
148 switch (src[0]) {
149 case('r'): perms |= S_IRUSR; break;
150 case('-'): break;
151 default: return 0;
154 switch (src[1]) {
155 case('w'): perms |= S_IWUSR; break;
156 case('-'): break;
157 default: return 0;
160 switch (src[2]) {
161 case('S'): perms |= S_ISUID; break;
162 case('s'): perms |= S_ISUID; /* fall-through */
163 case('x'): perms |= S_IXUSR; break;
164 case('-'): break;
165 default: return 0;
168 /* Group permissions */
169 switch (src[3]) {
170 case('r'): perms |= S_IRGRP; break;
171 case('-'): break;
172 default: return 0;
175 switch (src[4]) {
176 case('w'): perms |= S_IWGRP; break;
177 case('-'): break;
178 default: return 0;
181 switch (src[5]) {
182 case('S'): perms |= S_ISGID; break;
183 case('s'): perms |= S_ISGID; /* fall-through */
184 case('x'): perms |= S_IXGRP; break;
185 case('-'): break;
186 default: return 0;
189 /* Others permissions */
190 switch (src[6]) {
191 case('r'): perms |= S_IROTH; break;
192 case('-'): break;
193 default: return 0;
196 switch (src[7]) {
197 case('w'): perms |= S_IWOTH; break;
198 case('-'): break;
199 default: return 0;
202 switch (src[8]) {
203 case('T'): perms |= S_ISVTX; break;
204 case('t'): perms |= S_ISVTX; /* fall-through */
205 case('x'): perms |= S_IXOTH; break;
206 case('l'):
207 case('L'): perms |= S_ISGID; perms &= ~S_IXGRP; break;
208 case('-'): break;
209 default: return 0;
212 return perms;
215 static struct ftp_file_info *
216 parse_ftp_unix_response(struct ftp_file_info *info, unsigned char *src, int len)
218 unsigned char *end = src + len;
219 unsigned char *pos;
220 struct tm mtime;
221 enum ftp_unix fact;
223 /* Decide the file type. */
225 enum ftp_file_type type = *src++;
227 switch (type) {
228 case FTP_FILE_PLAINFILE:
229 case FTP_FILE_DIRECTORY:
230 case FTP_FILE_SYMLINK:
231 info->type = type;
232 break;
234 default:
235 info->type = FTP_FILE_UNKNOWN;
239 memset(&mtime, 0, sizeof(mtime));
240 mtime.tm_isdst = -1;
242 /* Following is only needed to handle NetWare listings which are not
243 * (yet) handled. So disabled for now. --Zas */
244 /* skip_space_end(src, end); */
246 fact = FTP_UNIX_PERMISSIONS;
248 for (pos = src; src < end; src = pos) {
249 skip_nonspace_end(pos, end);
251 switch (fact) {
252 case FTP_UNIX_PERMISSIONS:
253 /* We wanna know permissions as well! And I decided to
254 * completely ignore the NetWare perms, they are very
255 * rare and of some nonstandart format. If you want
256 * them, though, I'll accept patch enabling them.
257 * --pasky */
258 if (pos - src == 9) /* 9 is length of "rwxrwxrwx". */
259 info->permissions = parse_ftp_unix_permissions(src, 9);
260 fact = FTP_UNIX_SIZE;
261 break;
263 case FTP_UNIX_SIZE:
264 /* Search for the size and month name combo: */
265 if (info->size != FTP_SIZE_UNKNOWN
266 && pos - src == 3) {
267 int month = parse_month((const unsigned char **) &src, pos);
269 if (month != -1) {
270 fact = FTP_UNIX_DAY;
271 mtime.tm_mon = month;
272 break;
276 if (!isdigit(*src)) {
277 info->size = FTP_SIZE_UNKNOWN;
278 break;
281 info->size = parse_ftp_number(&src, pos, 0, OFFT_MAX);
282 break;
284 case FTP_UNIX_DAY:
285 mtime.tm_mday = parse_day((const unsigned char **) &src, pos);
286 fact = FTP_UNIX_TIME;
287 break;
289 case FTP_UNIX_TIME:
290 /* This ought to be either the time, or the
291 * year. Let's be flexible! */
292 fact = FTP_UNIX_NAME;
294 /* We must deal with digits. */
295 if (!isdigit (*src))
296 break;
298 /* If we have a number x, it's a year. If we have x:y,
299 * it's hours and minutes. */
300 if (!memchr(src, ':', pos - src)) {
301 mtime.tm_year = parse_year((const unsigned char **) &src, pos);
302 break;
305 if (!parse_time((const unsigned char **) &src, &mtime, pos)) {
306 mtime.tm_hour = mtime.tm_min = mtime.tm_sec = 0;
308 break;
310 case FTP_UNIX_NAME:
311 /* Since the file name may contain spaces use @end as the
312 * token ending and not @pos. */
314 info->name.source = src;
315 info->name.length = end - src;
317 /* Some FTP sites choose to have ls -F as their default
318 * LIST output, which marks the symlinks with a trailing
319 * `@', directory names with a trailing `/' and
320 * executables with a trailing `*'. This is no problem
321 * unless encountering a symbolic link ending with `@',
322 * or an executable ending with `*' on a server without
323 * default -F output. I believe these cases are very
324 * rare. */
326 #define check_trailing_char(string, trailchar) \
327 ((string)->length > 0 \
328 && (string)->source[(string)->length - 1] == (trailchar))
330 switch (info->type) {
331 case FTP_FILE_DIRECTORY:
332 /* Check for trailing `/' */
333 if (check_trailing_char(&info->name, '/'))
334 info->name.length--;
335 break;
337 case FTP_FILE_SYMLINK:
338 /* If the file is a symbolic link, it should
339 * have a ` -> ' somewhere. */
340 while (pos && pos + 3 < end) {
341 if (!memcmp(pos, " -> ", 4)) {
342 info->symlink.source = pos + 4;
343 info->symlink.length = end - pos - 4;
344 info->name.length = pos - src;
345 break;
348 pos = memchr(pos + 1, ' ', end - pos);
351 if (!info->symlink.source)
352 return NULL;
354 /* Check for trailing `@' on link and trailing
355 * `/' on the link target if it's a directory */
356 if (check_trailing_char(&info->name, '@'))
357 info->name.length--;
359 if (check_trailing_char(&info->symlink, '/'))
360 info->symlink.length--;
361 break;
363 case FTP_FILE_PLAINFILE:
364 /* Check for trailing `*' on files which are
365 * executable. */
366 if ((info->permissions & 0111)
367 && check_trailing_char(&info->name, '*'))
368 info->name.length--;
370 default:
371 break;
374 if (mtime.tm_year == 0) {
375 /* Get the current time. */
376 time_t timenow = time(NULL);
377 struct tm *now = localtime(&timenow);
379 mtime.tm_year = now->tm_year;
381 /* Some listings will not specify the year if it
382 * is "obvious" that the file was from the
383 * previous year. E.g. if today is 97-01-12, and
384 * you see a file of Dec 15th, its year is 1996,
385 * not 1997. Thanks to Vladimir Volovich for
386 * mentioning this! */
387 if (mtime.tm_mon > now->tm_mon)
388 mtime.tm_year--;
391 info->mtime = mktime(&mtime); /* store the time-stamp */
392 info->local_time_zone = 1;
394 return info;
397 skip_space_end(pos, end);
400 return NULL;
404 /* Parser for VMS-style MultiNet (some spaces removed from examples): */
406 /* Converts VMS symbolic permissions to number-style ones, e.g. string
407 * RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W" (write).
408 * Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
409 static int
410 parse_ftp_vms_permissions(const unsigned char *src, int len)
412 int perms = 0;
413 int pos;
415 for (pos = 0; pos < len; pos++) {
416 switch (src[pos]) {
417 case ',': perms <<= 3; break;
418 case 'R': perms |= 4; break;
419 case 'W':
420 case 'D': perms |= 2; break;
421 case 'E': perms |= 1; break;
422 default:
423 /* Wrong VMS permissons! */
424 return 0;
428 return perms;
431 static struct ftp_file_info *
432 parse_ftp_vms_response(struct ftp_file_info *info, unsigned char *src, int len)
434 unsigned char *end = src + len;
435 unsigned char *pos;
437 /* First column: Name. A bit of black magic again. The name maybe either
438 * ABCD.EXT or ABCD.EXT;NUM and it might be on a separate line.
439 * Therefore we will first try to get the complete name until the first
440 * space character; if it fails, we assume that the name occupies the
441 * whole line. After that we search for the version separator ";", we
442 * remove it and check the extension of the file; extension .DIR denotes
443 * directory. */
445 pos = memchr(src, ';', end - src);
446 if (!pos) return NULL;
448 info->name.source = src;
449 info->name.length = pos - src;
451 /* If the name ends on .DIR or .DIR;#, it's a directory. We also
452 * set the file size to zero as the listing does tell us only
453 * the size in filesystem blocks - for an integrity check (when
454 * mirroring, for example) we would need the size in bytes. */
456 if (info->name.length > 4 && !memcmp(&pos[-4], ".DIR", 4)) {
457 info->type = FTP_FILE_DIRECTORY;
458 info->name.length -= 4;
459 } else {
460 info->type = FTP_FILE_PLAINFILE;
463 skip_nonspace_end(pos, end);
464 skip_space_end(pos, end);
465 src = pos;
468 /* Second column, if exists, or the first column of the next line
469 * contain file size in blocks. We will skip it. */
471 if (src >= end) {
472 /* FIXME: Handle multi-lined views. */
473 return NULL;
476 skip_nonspace_end(src, end);
477 skip_space_end(src, end);
478 if (src >= end) return NULL;
481 /* Third/Second column: Date DD-MMM-YYYY and
482 * Fourth/Third column: Time hh:mm[:ss] */
484 /* If the server produces garbage like
485 * 'EA95_0PS.GZ;1 No privilege for attempted operation'
486 * parse_date() will fail. */
487 info->mtime = parse_date(&src, end, 1, 0);
488 if (info->mtime == 0)
489 return NULL;
491 /* Be more tolerant from here on ... */
494 /* Skip the fifth column */
496 skip_space_end(src, end);
497 skip_nonspace_end(src, end);
498 skip_space_end(src, end);
499 if (src >= end) return info;
502 /* Sixth column: Permissions */
504 src = memchr(src, '(', end - src);
505 if (!src || src >= end)
506 return info;
508 src++;
510 pos = memchr(src, ')', end - src);
511 if (!pos) return info;
513 /* Permissons have the format "RWED,RWED,RE" */
514 info->permissions = parse_ftp_vms_permissions(src, pos - src);
516 return info;
520 /* Parser for the MSDOS-style format: */
522 struct ftp_file_info *
523 parse_ftp_winnt_response(struct ftp_file_info *info, unsigned char *src, int len)
525 struct tm mtime;
526 unsigned char *end = src + len;
528 /* Extracting name is a bit of black magic and we have to do it
529 * before `strtok' inserted extra \0 characters in the line
530 * string. For the moment let us just suppose that the name starts at
531 * column 39 of the listing. This way we could also recognize
532 * filenames that begin with a series of space characters (but who
533 * really wants to use such filenames anyway?). */
534 if (len <= 39) return NULL;
536 info->name.source = src + 39;
537 info->name.length = end - src - 39;
540 /* First column: mm-dd-yy. Should number parsing of the month fail,
541 * january will be assumed. */
543 memset(&mtime, 0, sizeof(mtime));
544 mtime.tm_isdst = -1;
546 mtime.tm_mon = (int) parse_ftp_number(&src, end, 1, 12);
547 if (src + 2 >= end || *src != '-')
548 return NULL;
550 src++;
552 mtime.tm_mday = parse_day((const unsigned char **) &src, end);
553 if (src + 2 >= end || *src != '-')
554 return NULL;
556 src++;
558 mtime.tm_year = parse_year((const unsigned char **) &src, end);
559 if (src >= end || mtime.tm_year == -1)
560 return NULL;
562 skip_space_end(src, end);
563 if (src >= end) return NULL;
566 /* Second column: hh:mm[AP]M, listing does not contain value for
567 * seconds */
569 if (!parse_time((const unsigned char **) &src, &mtime, end))
570 return NULL;
572 /* Store the time-stamp. */
573 info->mtime = mktime(&mtime);
575 skip_nonspace_end(src, end);
576 skip_space_end(src, end);
577 if (src >= end) return NULL;
580 /* Third column: Either file length, or <DIR>. We also set the
581 * permissions (guessed as 0644 for plain files and 0755 for directories
582 * as the listing does not give us a clue) and filetype here. */
584 if (*src == '<') {
585 info->type = FTP_FILE_DIRECTORY;
586 info->permissions = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
588 } else if (isdigit(*src)) {
589 info->type = FTP_FILE_PLAINFILE;
590 info->size = parse_ftp_number(&src, end, 0, OFFT_MAX);
591 info->permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
593 } else {
594 info->type = FTP_FILE_UNKNOWN;
597 return info;
601 struct ftp_file_info *
602 parse_ftp_file_info(struct ftp_file_info *info, unsigned char *src, int len)
604 assert(info && src && len > 0);
605 if_assert_failed return NULL;
607 switch (*src) {
608 case '+':
609 return parse_ftp_eplf_response(info, src, len);
611 case 'b':
612 case 'c':
613 case 'd':
614 case 'l':
615 case 'p':
616 case 's':
617 case '-':
618 break;
620 default:
621 if (memchr(src, ';', len))
622 return parse_ftp_vms_response(info, src, len);
624 if (isdigit(*src))
625 return parse_ftp_winnt_response(info, src, len);
628 return parse_ftp_unix_response(info, src, len);