busybox: update to 1.23.2
[tomato.git] / release / src / router / busybox / networking / wget.c
blob6c8bd90a8fe9262bce151d1a21eba937aa324de4
1 /* vi: set sw=4 ts=4: */
2 /*
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 /* Since we ignore these opts, we don't show them in --help */
17 /* //usage: " [--no-check-certificate] [--no-cache]" */
18 //usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19 //usage: )
20 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23 //usage: )
24 //usage:#define wget_full_usage "\n\n"
25 //usage: "Retrieve files via HTTP or FTP\n"
26 //usage: "\n -s Spider mode - only check file existence"
27 //usage: "\n -c Continue retrieval of aborted transfer"
28 //usage: "\n -q Quiet"
29 //usage: "\n -P DIR Save to DIR (default .)"
30 //usage: IF_FEATURE_WGET_TIMEOUT(
31 //usage: "\n -T SEC Network read timeout is SEC seconds"
32 //usage: )
33 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
34 //usage: "\n -U STR Use STR for User-Agent header"
35 //usage: "\n -Y Use proxy ('on' or 'off')"
37 #include "libbb.h"
39 #if 0
40 # define log_io(...) bb_error_msg(__VA_ARGS__)
41 # define SENDFMT(fp, fmt, ...) \
42 do { \
43 log_io("> " fmt, ##__VA_ARGS__); \
44 fprintf(fp, fmt, ##__VA_ARGS__); \
45 } while (0);
46 #else
47 # define log_io(...) ((void)0)
48 # define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
49 #endif
52 struct host_info {
53 char *allocated;
54 const char *path;
55 char *user;
56 const char *protocol;
57 char *host;
58 int port;
60 static const char P_FTP[] = "ftp";
61 static const char P_HTTP[] = "http";
62 static const char P_HTTPS[] = "https";
64 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
65 /* User-specified headers prevent using our corresponding built-in headers. */
66 enum {
67 HDR_HOST = (1<<0),
68 HDR_USER_AGENT = (1<<1),
69 HDR_RANGE = (1<<2),
70 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
71 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
73 static const char wget_user_headers[] ALIGN1 =
74 "Host:\0"
75 "User-Agent:\0"
76 "Range:\0"
77 # if ENABLE_FEATURE_WGET_AUTHENTICATION
78 "Authorization:\0"
79 "Proxy-Authorization:\0"
80 # endif
82 # define USR_HEADER_HOST (G.user_headers & HDR_HOST)
83 # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
84 # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
85 # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
86 # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
87 #else /* No long options, no user-headers :( */
88 # define USR_HEADER_HOST 0
89 # define USR_HEADER_USER_AGENT 0
90 # define USR_HEADER_RANGE 0
91 # define USR_HEADER_AUTH 0
92 # define USR_HEADER_PROXY_AUTH 0
93 #endif
95 /* Globals */
96 struct globals {
97 off_t content_len; /* Content-length of the file */
98 off_t beg_range; /* Range at which continue begins */
99 #if ENABLE_FEATURE_WGET_STATUSBAR
100 off_t transferred; /* Number of bytes transferred so far */
101 const char *curfile; /* Name of current file being transferred */
102 bb_progress_t pmt;
103 #endif
104 char *dir_prefix;
105 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
106 char *post_data;
107 char *extra_headers;
108 unsigned char user_headers; /* Headers mentioned by the user */
109 #endif
110 char *fname_out; /* where to direct output (-O) */
111 const char *proxy_flag; /* Use proxies if env vars are set */
112 const char *user_agent; /* "User-Agent" header field */
113 #if ENABLE_FEATURE_WGET_TIMEOUT
114 unsigned timeout_seconds;
115 bool connecting;
116 #endif
117 int output_fd;
118 int o_flags;
119 smallint chunked; /* chunked transfer encoding */
120 smallint got_clen; /* got content-length: from server */
121 /* Local downloads do benefit from big buffer.
122 * With 512 byte buffer, it was measured to be
123 * an order of magnitude slower than with big one.
125 uint64_t just_to_align_next_member;
126 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
127 } FIX_ALIASING;
128 #define G (*ptr_to_globals)
129 #define INIT_G() do { \
130 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
131 } while (0)
132 #define FINI_G() do { \
133 FREE_PTR_TO_GLOBALS(); \
134 } while (0)
137 /* Must match option string! */
138 enum {
139 WGET_OPT_CONTINUE = (1 << 0),
140 WGET_OPT_SPIDER = (1 << 1),
141 WGET_OPT_QUIET = (1 << 2),
142 WGET_OPT_OUTNAME = (1 << 3),
143 WGET_OPT_PREFIX = (1 << 4),
144 WGET_OPT_PROXY = (1 << 5),
145 WGET_OPT_USER_AGENT = (1 << 6),
146 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
147 WGET_OPT_RETRIES = (1 << 8),
148 WGET_OPT_PASSIVE = (1 << 9),
149 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
150 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
153 enum {
154 PROGRESS_START = -1,
155 PROGRESS_END = 0,
156 PROGRESS_BUMP = 1,
158 #if ENABLE_FEATURE_WGET_STATUSBAR
159 static void progress_meter(int flag)
161 if (option_mask32 & WGET_OPT_QUIET)
162 return;
164 if (flag == PROGRESS_START)
165 bb_progress_init(&G.pmt, G.curfile);
167 bb_progress_update(&G.pmt,
168 G.beg_range,
169 G.transferred,
170 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
173 if (flag == PROGRESS_END) {
174 bb_progress_free(&G.pmt);
175 bb_putchar_stderr('\n');
176 G.transferred = 0;
179 #else
180 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
181 #endif
184 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
185 * local addresses can have a scope identifier to specify the
186 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
187 * identifier is only valid on a single node.
189 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
190 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
191 * in the Host header as invalid requests, see
192 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
194 static void strip_ipv6_scope_id(char *host)
196 char *scope, *cp;
198 /* bbox wget actually handles IPv6 addresses without [], like
199 * wget "http://::1/xxx", but this is not standard.
200 * To save code, _here_ we do not support it. */
202 if (host[0] != '[')
203 return; /* not IPv6 */
205 scope = strchr(host, '%');
206 if (!scope)
207 return;
209 /* Remove the IPv6 zone identifier from the host address */
210 cp = strchr(host, ']');
211 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
212 /* malformed address (not "[xx]:nn" or "[xx]") */
213 return;
216 /* cp points to "]...", scope points to "%eth0]..." */
217 overlapping_strcpy(scope, cp);
220 #if ENABLE_FEATURE_WGET_AUTHENTICATION
221 /* Base64-encode character string. */
222 static char *base64enc(const char *str)
224 unsigned len = strlen(str);
225 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
226 len = sizeof(G.wget_buf)/4*3 - 10;
227 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
228 return G.wget_buf;
230 #endif
232 static char* sanitize_string(char *s)
234 unsigned char *p = (void *) s;
235 while (*p >= ' ')
236 p++;
237 *p = '\0';
238 return s;
241 #if ENABLE_FEATURE_WGET_TIMEOUT
242 static void alarm_handler(int sig UNUSED_PARAM)
244 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
245 if (G.connecting)
246 bb_error_msg_and_die("download timed out");
248 #endif
250 static FILE *open_socket(len_and_sockaddr *lsa)
252 int fd;
253 FILE *fp;
255 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
256 fd = xconnect_stream(lsa);
257 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
259 /* glibc 2.4 seems to try seeking on it - ??! */
260 /* hopefully it understands what ESPIPE means... */
261 fp = fdopen(fd, "r+");
262 if (!fp)
263 bb_perror_msg_and_die(bb_msg_memory_exhausted);
265 return fp;
268 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
269 /* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
270 static char fgets_and_trim(FILE *fp)
272 char c;
273 char *buf_ptr;
275 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
276 bb_perror_msg_and_die("error getting response");
278 buf_ptr = strchrnul(G.wget_buf, '\n');
279 c = *buf_ptr;
280 *buf_ptr = '\0';
281 buf_ptr = strchrnul(G.wget_buf, '\r');
282 *buf_ptr = '\0';
284 log_io("< %s", G.wget_buf);
286 return c;
289 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
291 int result;
292 if (s1) {
293 if (!s2)
294 s2 = "";
295 fprintf(fp, "%s%s\r\n", s1, s2);
296 fflush(fp);
297 log_io("> %s%s", s1, s2);
300 do {
301 fgets_and_trim(fp);
302 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
304 G.wget_buf[3] = '\0';
305 result = xatoi_positive(G.wget_buf);
306 G.wget_buf[3] = ' ';
307 return result;
310 static void parse_url(const char *src_url, struct host_info *h)
312 char *url, *p, *sp;
314 free(h->allocated);
315 h->allocated = url = xstrdup(src_url);
317 h->protocol = P_FTP;
318 p = strstr(url, "://");
319 if (p) {
320 *p = '\0';
321 h->host = p + 3;
322 if (strcmp(url, P_FTP) == 0) {
323 h->port = bb_lookup_port(P_FTP, "tcp", 21);
324 } else
325 if (strcmp(url, P_HTTPS) == 0) {
326 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
327 h->protocol = P_HTTPS;
328 } else
329 if (strcmp(url, P_HTTP) == 0) {
330 http:
331 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
332 h->protocol = P_HTTP;
333 } else {
334 *p = ':';
335 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
337 } else {
338 // GNU wget is user-friendly and falls back to http://
339 h->host = url;
340 goto http;
343 // FYI:
344 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
345 // 'GET /?var=a/b HTTP 1.0'
346 // and saves 'index.html?var=a%2Fb' (we save 'b')
347 // wget 'http://busybox.net?login=john@doe':
348 // request: 'GET /?login=john@doe HTTP/1.0'
349 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
350 // wget 'http://busybox.net#test/test':
351 // request: 'GET / HTTP/1.0'
352 // saves: 'index.html' (we save 'test')
354 // We also don't add unique .N suffix if file exists...
355 sp = strchr(h->host, '/');
356 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
357 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
358 if (!sp) {
359 h->path = "";
360 } else if (*sp == '/') {
361 *sp = '\0';
362 h->path = sp + 1;
363 } else { // '#' or '?'
364 // http://busybox.net?login=john@doe is a valid URL
365 // memmove converts to:
366 // http:/busybox.nett?login=john@doe...
367 memmove(h->host - 1, h->host, sp - h->host);
368 h->host--;
369 sp[-1] = '\0';
370 h->path = sp;
373 sp = strrchr(h->host, '@');
374 if (sp != NULL) {
375 // URL-decode "user:password" string before base64-encoding:
376 // wget http://test:my%20pass@example.com should send
377 // Authorization: Basic dGVzdDpteSBwYXNz
378 // which decodes to "test:my pass".
379 // Standard wget and curl do this too.
380 *sp = '\0';
381 free(h->user);
382 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
383 h->host = sp + 1;
385 /* else: h->user remains NULL, or as set by original request
386 * before redirect (if we are here after a redirect).
390 static char *gethdr(FILE *fp)
392 char *s, *hdrval;
393 int c;
395 /* retrieve header line */
396 c = fgets_and_trim(fp);
398 /* end of the headers? */
399 if (G.wget_buf[0] == '\0')
400 return NULL;
402 /* convert the header name to lower case */
403 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
405 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
406 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
407 * "A-Z" maps to "a-z".
408 * "@[\]" can't occur in header names.
409 * "^_" maps to "~,DEL" (which is wrong).
410 * "^" was never seen yet, "_" was seen from web.archive.org
411 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
413 *s |= 0x20;
416 /* verify we are at the end of the header name */
417 if (*s != ':')
418 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
420 /* locate the start of the header value */
421 *s++ = '\0';
422 hdrval = skip_whitespace(s);
424 if (c != '\n') {
425 /* Rats! The buffer isn't big enough to hold the entire header value */
426 while (c = getc(fp), c != EOF && c != '\n')
427 continue;
430 return hdrval;
433 static void reset_beg_range_to_zero(void)
435 bb_error_msg("restart failed");
436 G.beg_range = 0;
437 xlseek(G.output_fd, 0, SEEK_SET);
438 /* Done at the end instead: */
439 /* ftruncate(G.output_fd, 0); */
442 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
444 FILE *sfp;
445 char *str;
446 int port;
448 if (!target->user)
449 target->user = xstrdup("anonymous:busybox@");
451 sfp = open_socket(lsa);
452 if (ftpcmd(NULL, NULL, sfp) != 220)
453 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
456 * Splitting username:password pair,
457 * trying to log in
459 str = strchr(target->user, ':');
460 if (str)
461 *str++ = '\0';
462 switch (ftpcmd("USER ", target->user, sfp)) {
463 case 230:
464 break;
465 case 331:
466 if (ftpcmd("PASS ", str, sfp) == 230)
467 break;
468 /* fall through (failed login) */
469 default:
470 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
473 ftpcmd("TYPE I", NULL, sfp);
476 * Querying file size
478 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
479 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
480 if (G.content_len < 0 || errno) {
481 bb_error_msg_and_die("SIZE value is garbage");
483 G.got_clen = 1;
487 * Entering passive mode
489 if (ftpcmd("PASV", NULL, sfp) != 227) {
490 pasv_error:
491 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
493 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
494 // Server's IP is N1.N2.N3.N4 (we ignore it)
495 // Server's port for data connection is P1*256+P2
496 str = strrchr(G.wget_buf, ')');
497 if (str) str[0] = '\0';
498 str = strrchr(G.wget_buf, ',');
499 if (!str) goto pasv_error;
500 port = xatou_range(str+1, 0, 255);
501 *str = '\0';
502 str = strrchr(G.wget_buf, ',');
503 if (!str) goto pasv_error;
504 port += xatou_range(str+1, 0, 255) * 256;
505 set_nport(&lsa->u.sa, htons(port));
507 *dfpp = open_socket(lsa);
509 if (G.beg_range != 0) {
510 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
511 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
512 G.content_len -= G.beg_range;
513 else
514 reset_beg_range_to_zero();
517 if (ftpcmd("RETR ", target->path, sfp) > 150)
518 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
520 return sfp;
523 static int spawn_https_helper(const char *host, unsigned port)
525 char *allocated = NULL;
526 int sp[2];
527 int pid;
529 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
530 /* Kernel can have AF_UNIX support disabled */
531 bb_perror_msg_and_die("socketpair");
533 if (!strchr(host, ':'))
534 host = allocated = xasprintf("%s:%u", host, port);
536 pid = BB_MMU ? xfork() : xvfork();
537 if (pid == 0) {
538 /* Child */
539 char *argv[6];
541 close(sp[0]);
542 xmove_fd(sp[1], 0);
543 xdup2(0, 1);
545 * TODO: develop a tiny ssl/tls helper (using matrixssl?),
546 * try to exec it here before falling back to big fat openssl.
549 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
550 * It prints some debug stuff on stderr, don't know how to suppress it.
551 * Work around by dev-nulling stderr. We lose all error messages :(
553 xmove_fd(2, 3);
554 xopen("/dev/null", O_RDWR);
555 argv[0] = (char*)"openssl";
556 argv[1] = (char*)"s_client";
557 argv[2] = (char*)"-quiet";
558 argv[3] = (char*)"-connect";
559 argv[4] = (char*)host;
560 argv[5] = NULL;
561 BB_EXECVP(argv[0], argv);
562 xmove_fd(3, 2);
563 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
564 /* notreached */
567 /* Parent */
568 free(allocated);
569 close(sp[1]);
570 return sp[0];
573 /* See networking/ssl_helper/README */
574 #define SSL_HELPER 0
576 #if SSL_HELPER
577 static void spawn_https_helper1(int network_fd)
579 int sp[2];
580 int pid;
582 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
583 /* Kernel can have AF_UNIX support disabled */
584 bb_perror_msg_and_die("socketpair");
586 pid = BB_MMU ? xfork() : xvfork();
587 if (pid == 0) {
588 /* Child */
589 char *argv[3];
591 close(sp[0]);
592 xmove_fd(sp[1], 0);
593 xdup2(0, 1);
594 xmove_fd(network_fd, 3);
596 * A simple ssl/tls helper
598 argv[0] = (char*)"ssl_helper";
599 argv[1] = (char*)"-d3";
600 argv[2] = NULL;
601 BB_EXECVP(argv[0], argv);
602 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
603 /* notreached */
606 /* Parent */
607 close(sp[1]);
608 xmove_fd(sp[0], network_fd);
610 #endif
612 static void NOINLINE retrieve_file_data(FILE *dfp)
614 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
615 # if ENABLE_FEATURE_WGET_TIMEOUT
616 unsigned second_cnt = G.timeout_seconds;
617 # endif
618 struct pollfd polldata;
620 polldata.fd = fileno(dfp);
621 polldata.events = POLLIN | POLLPRI;
622 #endif
623 progress_meter(PROGRESS_START);
625 if (G.chunked)
626 goto get_clen;
628 /* Loops only if chunked */
629 while (1) {
631 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
632 /* Must use nonblocking I/O, otherwise fread will loop
633 * and *block* until it reads full buffer,
634 * which messes up progress bar and/or timeout logic.
635 * Because of nonblocking I/O, we need to dance
636 * very carefully around EAGAIN. See explanation at
637 * clearerr() calls.
639 ndelay_on(polldata.fd);
640 #endif
641 while (1) {
642 int n;
643 unsigned rdsz;
645 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
646 /* fread internally uses read loop, which in our case
647 * is usually exited when we get EAGAIN.
648 * In this case, libc sets error marker on the stream.
649 * Need to clear it before next fread to avoid possible
650 * rare false positive ferror below. Rare because usually
651 * fread gets more than zero bytes, and we don't fall
652 * into if (n <= 0) ...
654 clearerr(dfp);
655 #endif
656 errno = 0;
657 rdsz = sizeof(G.wget_buf);
658 if (G.got_clen) {
659 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
660 if ((int)G.content_len <= 0)
661 break;
662 rdsz = (unsigned)G.content_len;
665 n = fread(G.wget_buf, 1, rdsz, dfp);
667 if (n > 0) {
668 xwrite(G.output_fd, G.wget_buf, n);
669 #if ENABLE_FEATURE_WGET_STATUSBAR
670 G.transferred += n;
671 #endif
672 if (G.got_clen) {
673 G.content_len -= n;
674 if (G.content_len == 0)
675 break;
677 #if ENABLE_FEATURE_WGET_TIMEOUT
678 second_cnt = G.timeout_seconds;
679 #endif
680 goto bump;
683 /* n <= 0.
684 * man fread:
685 * If error occurs, or EOF is reached, the return value
686 * is a short item count (or zero).
687 * fread does not distinguish between EOF and error.
689 if (errno != EAGAIN) {
690 if (ferror(dfp)) {
691 progress_meter(PROGRESS_END);
692 bb_perror_msg_and_die(bb_msg_read_error);
694 break; /* EOF, not error */
697 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
698 /* It was EAGAIN. There is no data. Wait up to one second
699 * then abort if timed out, or update the bar and try reading again.
701 if (safe_poll(&polldata, 1, 1000) == 0) {
702 # if ENABLE_FEATURE_WGET_TIMEOUT
703 if (second_cnt != 0 && --second_cnt == 0) {
704 progress_meter(PROGRESS_END);
705 bb_error_msg_and_die("download timed out");
707 # endif
708 /* We used to loop back to poll here,
709 * but there is no great harm in letting fread
710 * to try reading anyway.
713 #endif
714 bump:
715 /* Need to do it _every_ second for "stalled" indicator
716 * to be shown properly.
718 progress_meter(PROGRESS_BUMP);
719 } /* while (reading data) */
721 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
722 clearerr(dfp);
723 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
724 #endif
725 if (!G.chunked)
726 break;
728 fgets_and_trim(dfp); /* Eat empty line */
729 get_clen:
730 fgets_and_trim(dfp);
731 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
732 /* FIXME: error check? */
733 if (G.content_len == 0)
734 break; /* all done! */
735 G.got_clen = 1;
737 * Note that fgets may result in some data being buffered in dfp.
738 * We loop back to fread, which will retrieve this data.
739 * Also note that code has to be arranged so that fread
740 * is done _before_ one-second poll wait - poll doesn't know
741 * about stdio buffering and can result in spurious one second waits!
745 /* If -c failed, we restart from the beginning,
746 * but we do not truncate file then, we do it only now, at the end.
747 * This lets user to ^C if his 99% complete 10 GB file download
748 * failed to restart *without* losing the almost complete file.
751 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
752 if (pos != (off_t)-1)
753 ftruncate(G.output_fd, pos);
756 /* Draw full bar and free its resources */
757 G.chunked = 0; /* makes it show 100% even for chunked download */
758 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
759 progress_meter(PROGRESS_END);
762 static void download_one_url(const char *url)
764 bool use_proxy; /* Use proxies if env vars are set */
765 int redir_limit;
766 len_and_sockaddr *lsa;
767 FILE *sfp; /* socket to web/ftp server */
768 FILE *dfp; /* socket to ftp server (data) */
769 char *proxy = NULL;
770 char *fname_out_alloc;
771 char *redirected_path = NULL;
772 struct host_info server;
773 struct host_info target;
775 server.allocated = NULL;
776 target.allocated = NULL;
777 server.user = NULL;
778 target.user = NULL;
780 parse_url(url, &target);
782 /* Use the proxy if necessary */
783 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
784 if (use_proxy) {
785 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
786 //FIXME: what if protocol is https? Ok to use http_proxy?
787 use_proxy = (proxy && proxy[0]);
788 if (use_proxy)
789 parse_url(proxy, &server);
791 if (!use_proxy) {
792 server.port = target.port;
793 if (ENABLE_FEATURE_IPV6) {
794 //free(server.allocated); - can't be non-NULL
795 server.host = server.allocated = xstrdup(target.host);
796 } else {
797 server.host = target.host;
801 if (ENABLE_FEATURE_IPV6)
802 strip_ipv6_scope_id(target.host);
804 /* If there was no -O FILE, guess output filename */
805 fname_out_alloc = NULL;
806 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
807 G.fname_out = bb_get_last_path_component_nostrip(target.path);
808 /* handle "wget http://kernel.org//" */
809 if (G.fname_out[0] == '/' || !G.fname_out[0])
810 G.fname_out = (char*)"index.html";
811 /* -P DIR is considered only if there was no -O FILE */
812 if (G.dir_prefix)
813 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
814 else {
815 /* redirects may free target.path later, need to make a copy */
816 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
819 #if ENABLE_FEATURE_WGET_STATUSBAR
820 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
821 #endif
823 /* Determine where to start transfer */
824 G.beg_range = 0;
825 if (option_mask32 & WGET_OPT_CONTINUE) {
826 G.output_fd = open(G.fname_out, O_WRONLY);
827 if (G.output_fd >= 0) {
828 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
830 /* File doesn't exist. We do not create file here yet.
831 * We are not sure it exists on remote side */
834 redir_limit = 5;
835 resolve_lsa:
836 lsa = xhost2sockaddr(server.host, server.port);
837 if (!(option_mask32 & WGET_OPT_QUIET)) {
838 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
839 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
840 free(s);
842 establish_session:
843 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
844 G.got_clen = 0;
845 G.chunked = 0;
846 if (use_proxy || target.protocol != P_FTP) {
848 * HTTP session
850 char *str;
851 int status;
853 /* Open socket to http(s) server */
854 if (target.protocol == P_HTTPS) {
855 /* openssl-based helper
856 * Inconvenient API since we can't give it an open fd
858 int fd = spawn_https_helper(server.host, server.port);
859 sfp = fdopen(fd, "r+");
860 if (!sfp)
861 bb_perror_msg_and_die(bb_msg_memory_exhausted);
862 } else
863 sfp = open_socket(lsa);
864 #if SSL_HELPER
865 if (target.protocol == P_HTTPS)
866 spawn_https_helper1(fileno(sfp));
867 #endif
868 /* Send HTTP request */
869 if (use_proxy) {
870 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
871 target.protocol, target.host,
872 target.path);
873 } else {
874 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
875 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
876 target.path);
878 if (!USR_HEADER_HOST)
879 SENDFMT(sfp, "Host: %s\r\n", target.host);
880 if (!USR_HEADER_USER_AGENT)
881 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
883 /* Ask server to close the connection as soon as we are done
884 * (IOW: we do not intend to send more requests)
886 SENDFMT(sfp, "Connection: close\r\n");
888 #if ENABLE_FEATURE_WGET_AUTHENTICATION
889 if (target.user && !USR_HEADER_AUTH) {
890 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
891 base64enc(target.user));
893 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
894 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
895 base64enc(server.user));
897 #endif
899 if (G.beg_range != 0 && !USR_HEADER_RANGE)
900 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
902 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
903 if (G.extra_headers) {
904 log_io(G.extra_headers);
905 fputs(G.extra_headers, sfp);
908 if (option_mask32 & WGET_OPT_POST_DATA) {
909 SENDFMT(sfp,
910 "Content-Type: application/x-www-form-urlencoded\r\n"
911 "Content-Length: %u\r\n"
912 "\r\n"
913 "%s",
914 (int) strlen(G.post_data), G.post_data
916 } else
917 #endif
919 SENDFMT(sfp, "\r\n");
922 fflush(sfp);
925 * Retrieve HTTP response line and check for "200" status code.
927 read_response:
928 fgets_and_trim(sfp);
930 str = G.wget_buf;
931 str = skip_non_whitespace(str);
932 str = skip_whitespace(str);
933 // FIXME: no error check
934 // xatou wouldn't work: "200 OK"
935 status = atoi(str);
936 switch (status) {
937 case 0:
938 case 100:
939 while (gethdr(sfp) != NULL)
940 /* eat all remaining headers */;
941 goto read_response;
942 case 200:
944 Response 204 doesn't say "null file", it says "metadata
945 has changed but data didn't":
947 "10.2.5 204 No Content
948 The server has fulfilled the request but does not need to return
949 an entity-body, and might want to return updated metainformation.
950 The response MAY include new or updated metainformation in the form
951 of entity-headers, which if present SHOULD be associated with
952 the requested variant.
954 If the client is a user agent, it SHOULD NOT change its document
955 view from that which caused the request to be sent. This response
956 is primarily intended to allow input for actions to take place
957 without causing a change to the user agent's active document view,
958 although any new or updated metainformation SHOULD be applied
959 to the document currently in the user agent's active view.
961 The 204 response MUST NOT include a message-body, and thus
962 is always terminated by the first empty line after the header fields."
964 However, in real world it was observed that some web servers
965 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
967 case 204:
968 if (G.beg_range != 0) {
969 /* "Range:..." was not honored by the server.
970 * Restart download from the beginning.
972 reset_beg_range_to_zero();
974 break;
975 case 300: /* redirection */
976 case 301:
977 case 302:
978 case 303:
979 break;
980 case 206: /* Partial Content */
981 if (G.beg_range != 0)
982 /* "Range:..." worked. Good. */
983 break;
984 /* Partial Content even though we did not ask for it??? */
985 /* fall through */
986 default:
987 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
991 * Retrieve HTTP headers.
993 while ((str = gethdr(sfp)) != NULL) {
994 static const char keywords[] ALIGN1 =
995 "content-length\0""transfer-encoding\0""location\0";
996 enum {
997 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
999 smalluint key;
1001 /* gethdr converted "FOO:" string to lowercase */
1003 /* strip trailing whitespace */
1004 char *s = strchrnul(str, '\0') - 1;
1005 while (s >= str && (*s == ' ' || *s == '\t')) {
1006 *s = '\0';
1007 s--;
1009 key = index_in_strings(keywords, G.wget_buf) + 1;
1010 if (key == KEY_content_length) {
1011 G.content_len = BB_STRTOOFF(str, NULL, 10);
1012 if (G.content_len < 0 || errno) {
1013 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1015 G.got_clen = 1;
1016 continue;
1018 if (key == KEY_transfer_encoding) {
1019 if (strcmp(str_tolower(str), "chunked") != 0)
1020 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1021 G.chunked = 1;
1023 if (key == KEY_location && status >= 300) {
1024 if (--redir_limit == 0)
1025 bb_error_msg_and_die("too many redirections");
1026 fclose(sfp);
1027 if (str[0] == '/') {
1028 free(redirected_path);
1029 target.path = redirected_path = xstrdup(str+1);
1030 /* lsa stays the same: it's on the same server */
1031 } else {
1032 parse_url(str, &target);
1033 if (!use_proxy) {
1034 /* server.user remains untouched */
1035 free(server.allocated);
1036 server.allocated = NULL;
1037 server.host = target.host;
1038 /* strip_ipv6_scope_id(target.host); - no! */
1039 /* we assume remote never gives us IPv6 addr with scope id */
1040 server.port = target.port;
1041 free(lsa);
1042 goto resolve_lsa;
1043 } /* else: lsa stays the same: we use proxy */
1045 goto establish_session;
1048 // if (status >= 300)
1049 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
1051 /* For HTTP, data is pumped over the same connection */
1052 dfp = sfp;
1054 } else {
1056 * FTP session
1058 sfp = prepare_ftp_session(&dfp, &target, lsa);
1061 free(lsa);
1063 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1064 if (G.output_fd < 0)
1065 G.output_fd = xopen(G.fname_out, G.o_flags);
1066 retrieve_file_data(dfp);
1067 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1068 xclose(G.output_fd);
1069 G.output_fd = -1;
1073 if (dfp != sfp) {
1074 /* It's ftp. Close data connection properly */
1075 fclose(dfp);
1076 if (ftpcmd(NULL, NULL, sfp) != 226)
1077 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1078 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1080 fclose(sfp);
1082 free(server.allocated);
1083 free(target.allocated);
1084 free(server.user);
1085 free(target.user);
1086 free(fname_out_alloc);
1087 free(redirected_path);
1090 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1091 int wget_main(int argc UNUSED_PARAM, char **argv)
1093 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1094 static const char wget_longopts[] ALIGN1 =
1095 /* name, has_arg, val */
1096 "continue\0" No_argument "c"
1097 //FIXME: -s isn't --spider, it's --save-headers!
1098 "spider\0" No_argument "s"
1099 "quiet\0" No_argument "q"
1100 "output-document\0" Required_argument "O"
1101 "directory-prefix\0" Required_argument "P"
1102 "proxy\0" Required_argument "Y"
1103 "user-agent\0" Required_argument "U"
1104 #if ENABLE_FEATURE_WGET_TIMEOUT
1105 "timeout\0" Required_argument "T"
1106 #endif
1107 /* Ignored: */
1108 // "tries\0" Required_argument "t"
1109 /* Ignored (we always use PASV): */
1110 "passive-ftp\0" No_argument "\xff"
1111 "header\0" Required_argument "\xfe"
1112 "post-data\0" Required_argument "\xfd"
1113 /* Ignored (we don't do ssl) */
1114 "no-check-certificate\0" No_argument "\xfc"
1115 /* Ignored (we don't support caching) */
1116 "no-cache\0" No_argument "\xfb"
1118 #endif
1120 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1121 llist_t *headers_llist = NULL;
1122 #endif
1124 INIT_G();
1126 #if ENABLE_FEATURE_WGET_TIMEOUT
1127 G.timeout_seconds = 900;
1128 signal(SIGALRM, alarm_handler);
1129 #endif
1130 G.proxy_flag = "on"; /* use proxies if env vars are set */
1131 G.user_agent = "Wget"; /* "User-Agent" header field */
1133 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1134 applet_long_options = wget_longopts;
1135 #endif
1136 opt_complementary = "-1"
1137 IF_FEATURE_WGET_TIMEOUT(":T+")
1138 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1139 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1140 &G.fname_out, &G.dir_prefix,
1141 &G.proxy_flag, &G.user_agent,
1142 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1143 NULL /* -t RETRIES */
1144 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1145 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1147 argv += optind;
1149 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
1150 if (headers_llist) {
1151 int size = 0;
1152 char *hdr;
1153 llist_t *ll = headers_llist;
1154 while (ll) {
1155 size += strlen(ll->data) + 2;
1156 ll = ll->link;
1158 G.extra_headers = hdr = xmalloc(size + 1);
1159 while (headers_llist) {
1160 int bit;
1161 const char *words;
1163 size = sprintf(hdr, "%s\r\n",
1164 (char*)llist_pop(&headers_llist));
1165 /* a bit like index_in_substrings but don't match full key */
1166 bit = 1;
1167 words = wget_user_headers;
1168 while (*words) {
1169 if (strstr(hdr, words) == hdr) {
1170 G.user_headers |= bit;
1171 break;
1173 bit <<= 1;
1174 words += strlen(words) + 1;
1176 hdr += size;
1179 #endif
1181 G.output_fd = -1;
1182 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1183 if (G.fname_out) { /* -O FILE ? */
1184 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1185 G.output_fd = 1;
1186 option_mask32 &= ~WGET_OPT_CONTINUE;
1188 /* compat with wget: -O FILE can overwrite */
1189 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1192 while (*argv)
1193 download_one_url(*argv++);
1195 if (G.output_fd >= 0)
1196 xclose(G.output_fd);
1198 #if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1199 free(G.extra_headers);
1200 #endif
1201 FINI_G();
1203 return EXIT_SUCCESS;