1 /* Parsing of FTP `ls' directory output. */
3 /* Parts of this file was part of GNU Wget
4 * Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. */
22 #include <sys/types.h>
26 #include "osdep/ascii.h"
27 #include "osdep/stat.h"
28 #include "protocol/date.h"
29 #include "protocol/ftp/parse.h"
30 #include "util/conv.h"
31 #include "util/string.h"
32 #include "util/time.h"
35 /* Examples of what the FTP parser is supposed to handle (and not handle) can
36 * be found in the test-ftp-parser file. */
38 #define skip_space_end(src, end) \
39 do { while ((src) < (end) && *(src) == ' ') (src)++; } while (0)
41 #define skip_nonspace_end(src, end) \
42 do { while ((src) < (end) && *(src) != ' ') (src)++; } while (0)
45 parse_ftp_number(unsigned char **src
, unsigned char *end
, off_t from
, off_t to
)
48 unsigned char *pos
= *src
;
50 for (; pos
< end
&& isdigit(*pos
); pos
++)
51 number
= (*pos
- '0') + 10 * number
;
55 if (number
< from
|| to
< number
)
62 /* Parser for the EPLF format (see http://pobox.com/~djb/proto/eplf.txt).
64 * Lines end with \r\n (CR-LF), but that is handled elsewhere.
68 FTP_EPLF_FILENAME
= ASCII_TAB
, /* Filename follows */
69 FTP_EPLF_PLAINFILE
= 'r', /* RETR is possible */
70 FTP_EPLF_DIRECTORY
= '/', /* CWD is possible */
71 FTP_EPLF_SIZE
= 's', /* File size follows */
72 FTP_EPLF_MTIME
= 'm', /* Modification time follows */
73 FTP_EPLF_ID
= 'i', /* Unique file id follows */
76 static struct ftp_file_info
*
77 parse_ftp_eplf_response(struct ftp_file_info
*info
, unsigned char *src
, int len
)
79 /* Skip the '+'-char which starts the line. */
80 unsigned char *end
= src
+ len
;
81 unsigned char *pos
= src
++;
83 /* Handle the series of facts about the file. */
85 for (; src
< end
&& pos
; src
= pos
+ 1) {
86 /* Find the end of the current fact. */
87 pos
= memchr(src
, ',', end
- src
);
91 case FTP_EPLF_FILENAME
:
92 if (src
>= end
) break;
93 info
->name
.source
= src
;
94 info
->name
.length
= end
- src
;
97 case FTP_EPLF_DIRECTORY
:
98 info
->type
= FTP_FILE_DIRECTORY
;
101 case FTP_EPLF_PLAINFILE
:
102 info
->type
= FTP_FILE_PLAINFILE
;
106 if (src
>= pos
) break;
107 info
->size
= parse_ftp_number(&src
, pos
, 0, OFFT_MAX
);
111 if (src
>= pos
) break;
112 info
->mtime
= (time_t) parse_ftp_number(&src
, pos
, 0, LONG_MAX
);
124 /* Parser for UNIX-style listing: */
127 FTP_UNIX_PERMISSIONS
,
134 /* Converts Un*x-style symbolic permissions to number-style ones, e.g. string
136 * Borrowed from lftp source code by Alexander V. Lukyanov.
137 * On parse error, it returns 0. */
139 parse_ftp_unix_permissions(const unsigned char *src
, int len
)
144 && !(len
== 10 && src
[9] == '+')) /* ACL tag */
147 /* User permissions */
149 case('r'): perms
|= S_IRUSR
; break;
155 case('w'): perms
|= S_IWUSR
; break;
161 case('S'): perms
|= S_ISUID
; break;
162 case('s'): perms
|= S_ISUID
; /* fall-through */
163 case('x'): perms
|= S_IXUSR
; break;
168 /* Group permissions */
170 case('r'): perms
|= S_IRGRP
; break;
176 case('w'): perms
|= S_IWGRP
; break;
182 case('S'): perms
|= S_ISGID
; break;
183 case('s'): perms
|= S_ISGID
; /* fall-through */
184 case('x'): perms
|= S_IXGRP
; break;
189 /* Others permissions */
191 case('r'): perms
|= S_IROTH
; break;
197 case('w'): perms
|= S_IWOTH
; break;
203 case('T'): perms
|= S_ISVTX
; break;
204 case('t'): perms
|= S_ISVTX
; /* fall-through */
205 case('x'): perms
|= S_IXOTH
; break;
207 case('L'): perms
|= S_ISGID
; perms
&= ~S_IXGRP
; break;
215 static struct ftp_file_info
*
216 parse_ftp_unix_response(struct ftp_file_info
*info
, unsigned char *src
, int len
)
218 unsigned char *end
= src
+ len
;
223 /* Decide the file type. */
225 enum ftp_file_type type
= *src
++;
228 case FTP_FILE_PLAINFILE
:
229 case FTP_FILE_DIRECTORY
:
230 case FTP_FILE_SYMLINK
:
235 info
->type
= FTP_FILE_UNKNOWN
;
239 memset(&mtime
, 0, sizeof(mtime
));
242 /* Following is only needed to handle NetWare listings which are not
243 * (yet) handled. So disabled for now. --Zas */
244 /* skip_space_end(src, end); */
246 fact
= FTP_UNIX_PERMISSIONS
;
248 for (pos
= src
; src
< end
; src
= pos
) {
249 skip_nonspace_end(pos
, end
);
252 case FTP_UNIX_PERMISSIONS
:
253 /* We wanna know permissions as well! And I decided to
254 * completely ignore the NetWare perms, they are very
255 * rare and of some nonstandart format. If you want
256 * them, though, I'll accept patch enabling them.
258 if (pos
- src
== 9) /* 9 is length of "rwxrwxrwx". */
259 info
->permissions
= parse_ftp_unix_permissions(src
, 9);
260 fact
= FTP_UNIX_SIZE
;
264 /* Search for the size and month name combo: */
265 if (info
->size
!= FTP_SIZE_UNKNOWN
267 int month
= parse_month((const unsigned char **) &src
, pos
);
271 mtime
.tm_mon
= month
;
276 if (!isdigit(*src
)) {
277 info
->size
= FTP_SIZE_UNKNOWN
;
281 info
->size
= parse_ftp_number(&src
, pos
, 0, OFFT_MAX
);
285 mtime
.tm_mday
= parse_day((const unsigned char **) &src
, pos
);
286 fact
= FTP_UNIX_TIME
;
290 /* This ought to be either the time, or the
291 * year. Let's be flexible! */
292 fact
= FTP_UNIX_NAME
;
294 /* We must deal with digits. */
298 /* If we have a number x, it's a year. If we have x:y,
299 * it's hours and minutes. */
300 if (!memchr(src
, ':', pos
- src
)) {
301 mtime
.tm_year
= parse_year((const unsigned char **) &src
, pos
);
305 if (!parse_time((const unsigned char **) &src
, &mtime
, pos
)) {
306 mtime
.tm_hour
= mtime
.tm_min
= mtime
.tm_sec
= 0;
311 /* Since the file name may contain spaces use @end as the
312 * token ending and not @pos. */
314 info
->name
.source
= src
;
315 info
->name
.length
= end
- src
;
317 /* Some FTP sites choose to have ls -F as their default
318 * LIST output, which marks the symlinks with a trailing
319 * `@', directory names with a trailing `/' and
320 * executables with a trailing `*'. This is no problem
321 * unless encountering a symbolic link ending with `@',
322 * or an executable ending with `*' on a server without
323 * default -F output. I believe these cases are very
326 #define check_trailing_char(string, trailchar) \
327 ((string)->length > 0 \
328 && (string)->source[(string)->length - 1] == (trailchar))
330 switch (info
->type
) {
331 case FTP_FILE_DIRECTORY
:
332 /* Check for trailing `/' */
333 if (check_trailing_char(&info
->name
, '/'))
337 case FTP_FILE_SYMLINK
:
338 /* If the file is a symbolic link, it should
339 * have a ` -> ' somewhere. */
340 while (pos
&& pos
+ 3 < end
) {
341 if (!memcmp(pos
, " -> ", 4)) {
342 info
->symlink
.source
= pos
+ 4;
343 info
->symlink
.length
= end
- pos
- 4;
344 info
->name
.length
= pos
- src
;
348 pos
= memchr(pos
+ 1, ' ', end
- pos
);
351 if (!info
->symlink
.source
)
354 /* Check for trailing `@' on link and trailing
355 * `/' on the link target if it's a directory */
356 if (check_trailing_char(&info
->name
, '@'))
359 if (check_trailing_char(&info
->symlink
, '/'))
360 info
->symlink
.length
--;
363 case FTP_FILE_PLAINFILE
:
364 /* Check for trailing `*' on files which are
366 if ((info
->permissions
& 0111)
367 && check_trailing_char(&info
->name
, '*'))
374 if (mtime
.tm_year
== 0) {
375 /* Get the current time. */
376 time_t timenow
= time(NULL
);
377 struct tm
*now
= localtime(&timenow
);
379 mtime
.tm_year
= now
->tm_year
;
381 /* Some listings will not specify the year if it
382 * is "obvious" that the file was from the
383 * previous year. E.g. if today is 97-01-12, and
384 * you see a file of Dec 15th, its year is 1996,
385 * not 1997. Thanks to Vladimir Volovich for
386 * mentioning this! */
387 if (mtime
.tm_mon
> now
->tm_mon
)
391 info
->mtime
= mktime(&mtime
); /* store the time-stamp */
392 info
->local_time_zone
= 1;
397 skip_space_end(pos
, end
);
404 /* Parser for VMS-style MultiNet (some spaces removed from examples): */
406 /* Converts VMS symbolic permissions to number-style ones, e.g. string
407 * RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W" (write).
408 * Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */
410 parse_ftp_vms_permissions(const unsigned char *src
, int len
)
415 for (pos
= 0; pos
< len
; pos
++) {
417 case ',': perms
<<= 3; break;
418 case 'R': perms
|= 4; break;
420 case 'D': perms
|= 2; break;
421 case 'E': perms
|= 1; break;
423 /* Wrong VMS permissons! */
431 static struct ftp_file_info
*
432 parse_ftp_vms_response(struct ftp_file_info
*info
, unsigned char *src
, int len
)
434 unsigned char *end
= src
+ len
;
437 /* First column: Name. A bit of black magic again. The name maybe either
438 * ABCD.EXT or ABCD.EXT;NUM and it might be on a separate line.
439 * Therefore we will first try to get the complete name until the first
440 * space character; if it fails, we assume that the name occupies the
441 * whole line. After that we search for the version separator ";", we
442 * remove it and check the extension of the file; extension .DIR denotes
445 pos
= memchr(src
, ';', end
- src
);
446 if (!pos
) return NULL
;
448 info
->name
.source
= src
;
449 info
->name
.length
= pos
- src
;
451 /* If the name ends on .DIR or .DIR;#, it's a directory. We also
452 * set the file size to zero as the listing does tell us only
453 * the size in filesystem blocks - for an integrity check (when
454 * mirroring, for example) we would need the size in bytes. */
456 if (info
->name
.length
> 4 && !memcmp(&pos
[-4], ".DIR", 4)) {
457 info
->type
= FTP_FILE_DIRECTORY
;
458 info
->name
.length
-= 4;
460 info
->type
= FTP_FILE_PLAINFILE
;
463 skip_nonspace_end(pos
, end
);
464 skip_space_end(pos
, end
);
468 /* Second column, if exists, or the first column of the next line
469 * contain file size in blocks. We will skip it. */
472 /* FIXME: Handle multi-lined views. */
476 skip_nonspace_end(src
, end
);
477 skip_space_end(src
, end
);
478 if (src
>= end
) return NULL
;
481 /* Third/Second column: Date DD-MMM-YYYY and
482 * Fourth/Third column: Time hh:mm[:ss] */
484 /* If the server produces garbage like
485 * 'EA95_0PS.GZ;1 No privilege for attempted operation'
486 * parse_date() will fail. */
487 info
->mtime
= parse_date(&src
, end
, 1, 0);
488 if (info
->mtime
== 0)
491 /* Be more tolerant from here on ... */
494 /* Skip the fifth column */
496 skip_space_end(src
, end
);
497 skip_nonspace_end(src
, end
);
498 skip_space_end(src
, end
);
499 if (src
>= end
) return info
;
502 /* Sixth column: Permissions */
504 src
= memchr(src
, '(', end
- src
);
505 if (!src
|| src
>= end
)
510 pos
= memchr(src
, ')', end
- src
);
511 if (!pos
) return info
;
513 /* Permissons have the format "RWED,RWED,RE" */
514 info
->permissions
= parse_ftp_vms_permissions(src
, pos
- src
);
520 /* Parser for the MSDOS-style format: */
522 struct ftp_file_info
*
523 parse_ftp_winnt_response(struct ftp_file_info
*info
, unsigned char *src
, int len
)
526 unsigned char *end
= src
+ len
;
528 /* Extracting name is a bit of black magic and we have to do it
529 * before `strtok' inserted extra \0 characters in the line
530 * string. For the moment let us just suppose that the name starts at
531 * column 39 of the listing. This way we could also recognize
532 * filenames that begin with a series of space characters (but who
533 * really wants to use such filenames anyway?). */
534 if (len
<= 39) return NULL
;
536 info
->name
.source
= src
+ 39;
537 info
->name
.length
= end
- src
- 39;
540 /* First column: mm-dd-yy. Should number parsing of the month fail,
541 * january will be assumed. */
543 memset(&mtime
, 0, sizeof(mtime
));
546 mtime
.tm_mon
= (int) parse_ftp_number(&src
, end
, 1, 12);
547 if (src
+ 2 >= end
|| *src
!= '-')
552 mtime
.tm_mday
= parse_day((const unsigned char **) &src
, end
);
553 if (src
+ 2 >= end
|| *src
!= '-')
558 mtime
.tm_year
= parse_year((const unsigned char **) &src
, end
);
559 if (src
>= end
|| mtime
.tm_year
== -1)
562 skip_space_end(src
, end
);
563 if (src
>= end
) return NULL
;
566 /* Second column: hh:mm[AP]M, listing does not contain value for
569 if (!parse_time((const unsigned char **) &src
, &mtime
, end
))
572 /* Store the time-stamp. */
573 info
->mtime
= mktime(&mtime
);
575 skip_nonspace_end(src
, end
);
576 skip_space_end(src
, end
);
577 if (src
>= end
) return NULL
;
580 /* Third column: Either file length, or <DIR>. We also set the
581 * permissions (guessed as 0644 for plain files and 0755 for directories
582 * as the listing does not give us a clue) and filetype here. */
585 info
->type
= FTP_FILE_DIRECTORY
;
586 info
->permissions
= S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
;
588 } else if (isdigit(*src
)) {
589 info
->type
= FTP_FILE_PLAINFILE
;
590 info
->size
= parse_ftp_number(&src
, end
, 0, OFFT_MAX
);
591 info
->permissions
= S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
;
594 info
->type
= FTP_FILE_UNKNOWN
;
601 struct ftp_file_info
*
602 parse_ftp_file_info(struct ftp_file_info
*info
, unsigned char *src
, int len
)
604 assert(info
&& src
&& len
> 0);
605 if_assert_failed
return NULL
;
609 return parse_ftp_eplf_response(info
, src
, len
);
621 if (memchr(src
, ';', len
))
622 return parse_ftp_vms_response(info
, src
, len
);
625 return parse_ftp_winnt_response(info
, src
, len
);
628 return parse_ftp_unix_response(info
, src
, len
);