Changes for kernel and Busybox
[tomato.git] / release / src / router / busybox / networking / wget.c
blob1991a1072812acc4484d64e2ee1202709468a8d7
1 /* vi: set sw=4 ts=4: */
2 /*
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
12 //usage:#define wget_trivial_usage
13 //usage: IF_FEATURE_WGET_LONG_OPTIONS(
14 //usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15 //usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16 //usage: " [--no-check-certificate] [-U|--user-agent AGENT]"
17 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18 //usage: )
19 //usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20 //usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21 //usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22 //usage: )
23 //usage:#define wget_full_usage "\n\n"
24 //usage: "Retrieve files via HTTP or FTP\n"
25 //usage: "\n -s Spider mode - only check file existence"
26 //usage: "\n -c Continue retrieval of aborted transfer"
27 //usage: "\n -q Quiet"
28 //usage: "\n -P DIR Save to DIR (default .)"
29 //usage: IF_FEATURE_WGET_TIMEOUT(
30 //usage: "\n -T SEC Network read timeout is SEC seconds"
31 //usage: )
32 //usage: "\n -O FILE Save to FILE ('-' for stdout)"
33 //usage: "\n -U STR Use STR for User-Agent header"
34 //usage: "\n -Y Use proxy ('on' or 'off')"
36 #include "libbb.h"
38 //#define log_io(...) bb_error_msg(__VA_ARGS__)
39 #define log_io(...) ((void)0)
42 struct host_info {
43 char *allocated;
44 const char *path;
45 const char *user;
46 char *host;
47 int port;
48 smallint is_ftp;
52 /* Globals */
53 struct globals {
54 off_t content_len; /* Content-length of the file */
55 off_t beg_range; /* Range at which continue begins */
56 #if ENABLE_FEATURE_WGET_STATUSBAR
57 off_t transferred; /* Number of bytes transferred so far */
58 const char *curfile; /* Name of current file being transferred */
59 bb_progress_t pmt;
60 #endif
61 char *dir_prefix;
62 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
63 char *post_data;
64 char *extra_headers;
65 #endif
66 char *fname_out; /* where to direct output (-O) */
67 const char *proxy_flag; /* Use proxies if env vars are set */
68 const char *user_agent; /* "User-Agent" header field */
69 #if ENABLE_FEATURE_WGET_TIMEOUT
70 unsigned timeout_seconds;
71 #endif
72 int output_fd;
73 int o_flags;
74 smallint chunked; /* chunked transfer encoding */
75 smallint got_clen; /* got content-length: from server */
76 /* Local downloads do benefit from big buffer.
77 * With 512 byte buffer, it was measured to be
78 * an order of magnitude slower than with big one.
80 uint64_t just_to_align_next_member;
81 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
82 } FIX_ALIASING;
83 #define G (*ptr_to_globals)
84 #define INIT_G() do { \
85 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
86 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
87 } while (0)
90 /* Must match option string! */
91 enum {
92 WGET_OPT_CONTINUE = (1 << 0),
93 WGET_OPT_SPIDER = (1 << 1),
94 WGET_OPT_QUIET = (1 << 2),
95 WGET_OPT_OUTNAME = (1 << 3),
96 WGET_OPT_PREFIX = (1 << 4),
97 WGET_OPT_PROXY = (1 << 5),
98 WGET_OPT_USER_AGENT = (1 << 6),
99 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
100 WGET_OPT_RETRIES = (1 << 8),
101 WGET_OPT_PASSIVE = (1 << 9),
102 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
103 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
106 enum {
107 PROGRESS_START = -1,
108 PROGRESS_END = 0,
109 PROGRESS_BUMP = 1,
111 #if ENABLE_FEATURE_WGET_STATUSBAR
112 static void progress_meter(int flag)
114 if (option_mask32 & WGET_OPT_QUIET)
115 return;
117 if (flag == PROGRESS_START)
118 bb_progress_init(&G.pmt, G.curfile);
120 bb_progress_update(&G.pmt,
121 G.beg_range,
122 G.transferred,
123 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
126 if (flag == PROGRESS_END) {
127 bb_progress_free(&G.pmt);
128 bb_putchar_stderr('\n');
129 G.transferred = 0;
132 #else
133 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
134 #endif
137 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
138 * local addresses can have a scope identifier to specify the
139 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
140 * identifier is only valid on a single node.
142 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
143 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
144 * in the Host header as invalid requests, see
145 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
147 static void strip_ipv6_scope_id(char *host)
149 char *scope, *cp;
151 /* bbox wget actually handles IPv6 addresses without [], like
152 * wget "http://::1/xxx", but this is not standard.
153 * To save code, _here_ we do not support it. */
155 if (host[0] != '[')
156 return; /* not IPv6 */
158 scope = strchr(host, '%');
159 if (!scope)
160 return;
162 /* Remove the IPv6 zone identifier from the host address */
163 cp = strchr(host, ']');
164 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
165 /* malformed address (not "[xx]:nn" or "[xx]") */
166 return;
169 /* cp points to "]...", scope points to "%eth0]..." */
170 overlapping_strcpy(scope, cp);
173 #if ENABLE_FEATURE_WGET_AUTHENTICATION
174 /* Base64-encode character string. */
175 static char *base64enc(const char *str)
177 unsigned len = strlen(str);
178 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
179 len = sizeof(G.wget_buf)/4*3 - 10;
180 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
181 return G.wget_buf;
183 #endif
185 static char* sanitize_string(char *s)
187 unsigned char *p = (void *) s;
188 while (*p >= ' ')
189 p++;
190 *p = '\0';
191 return s;
194 static FILE *open_socket(len_and_sockaddr *lsa)
196 FILE *fp;
198 /* glibc 2.4 seems to try seeking on it - ??! */
199 /* hopefully it understands what ESPIPE means... */
200 fp = fdopen(xconnect_stream(lsa), "r+");
201 if (fp == NULL)
202 bb_perror_msg_and_die(bb_msg_memory_exhausted);
204 return fp;
207 /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
208 static char fgets_and_trim(FILE *fp)
210 char c;
211 char *buf_ptr;
213 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
214 bb_perror_msg_and_die("error getting response");
216 buf_ptr = strchrnul(G.wget_buf, '\n');
217 c = *buf_ptr;
218 *buf_ptr = '\0';
219 buf_ptr = strchrnul(G.wget_buf, '\r');
220 *buf_ptr = '\0';
222 log_io("< %s", G.wget_buf);
224 return c;
227 static int ftpcmd(const char *s1, const char *s2, FILE *fp)
229 int result;
230 if (s1) {
231 if (!s2)
232 s2 = "";
233 fprintf(fp, "%s%s\r\n", s1, s2);
234 fflush(fp);
235 log_io("> %s%s", s1, s2);
238 do {
239 fgets_and_trim(fp);
240 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
242 G.wget_buf[3] = '\0';
243 result = xatoi_positive(G.wget_buf);
244 G.wget_buf[3] = ' ';
245 return result;
248 static void parse_url(const char *src_url, struct host_info *h)
250 char *url, *p, *sp;
252 free(h->allocated);
253 h->allocated = url = xstrdup(src_url);
255 if (strncmp(url, "http://", 7) == 0) {
256 h->port = bb_lookup_port("http", "tcp", 80);
257 h->host = url + 7;
258 h->is_ftp = 0;
259 } else if (strncmp(url, "ftp://", 6) == 0) {
260 h->port = bb_lookup_port("ftp", "tcp", 21);
261 h->host = url + 6;
262 h->is_ftp = 1;
263 } else
264 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
266 // FYI:
267 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
268 // 'GET /?var=a/b HTTP 1.0'
269 // and saves 'index.html?var=a%2Fb' (we save 'b')
270 // wget 'http://busybox.net?login=john@doe':
271 // request: 'GET /?login=john@doe HTTP/1.0'
272 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
273 // wget 'http://busybox.net#test/test':
274 // request: 'GET / HTTP/1.0'
275 // saves: 'index.html' (we save 'test')
277 // We also don't add unique .N suffix if file exists...
278 sp = strchr(h->host, '/');
279 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
280 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
281 if (!sp) {
282 h->path = "";
283 } else if (*sp == '/') {
284 *sp = '\0';
285 h->path = sp + 1;
286 } else { // '#' or '?'
287 // http://busybox.net?login=john@doe is a valid URL
288 // memmove converts to:
289 // http:/busybox.nett?login=john@doe...
290 memmove(h->host - 1, h->host, sp - h->host);
291 h->host--;
292 sp[-1] = '\0';
293 h->path = sp;
296 // We used to set h->user to NULL here, but this interferes
297 // with handling of code 302 ("object was moved")
299 sp = strrchr(h->host, '@');
300 if (sp != NULL) {
301 // URL-decode "user:password" string before base64-encoding:
302 // wget http://test:my%20pass@example.com should send
303 // Authorization: Basic dGVzdDpteSBwYXNz
304 // which decodes to "test:my pass".
305 // Standard wget and curl do this too.
306 *sp = '\0';
307 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
308 h->host = sp + 1;
311 sp = h->host;
314 static char *gethdr(FILE *fp)
316 char *s, *hdrval;
317 int c;
319 /* *istrunc = 0; */
321 /* retrieve header line */
322 c = fgets_and_trim(fp);
324 /* end of the headers? */
325 if (G.wget_buf[0] == '\0')
326 return NULL;
328 /* convert the header name to lower case */
329 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
330 /* tolower for "A-Z", no-op for "0-9a-z-." */
331 *s |= 0x20;
334 /* verify we are at the end of the header name */
335 if (*s != ':')
336 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
338 /* locate the start of the header value */
339 *s++ = '\0';
340 hdrval = skip_whitespace(s);
342 if (c != '\n') {
343 /* Rats! The buffer isn't big enough to hold the entire header value */
344 while (c = getc(fp), c != EOF && c != '\n')
345 continue;
348 return hdrval;
351 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
353 FILE *sfp;
354 char *str;
355 int port;
357 if (!target->user)
358 target->user = xstrdup("anonymous:busybox@");
360 sfp = open_socket(lsa);
361 if (ftpcmd(NULL, NULL, sfp) != 220)
362 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
365 * Splitting username:password pair,
366 * trying to log in
368 str = strchr(target->user, ':');
369 if (str)
370 *str++ = '\0';
371 switch (ftpcmd("USER ", target->user, sfp)) {
372 case 230:
373 break;
374 case 331:
375 if (ftpcmd("PASS ", str, sfp) == 230)
376 break;
377 /* fall through (failed login) */
378 default:
379 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
382 ftpcmd("TYPE I", NULL, sfp);
385 * Querying file size
387 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
388 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
389 if (G.content_len < 0 || errno) {
390 bb_error_msg_and_die("SIZE value is garbage");
392 G.got_clen = 1;
396 * Entering passive mode
398 if (ftpcmd("PASV", NULL, sfp) != 227) {
399 pasv_error:
400 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
402 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
403 // Server's IP is N1.N2.N3.N4 (we ignore it)
404 // Server's port for data connection is P1*256+P2
405 str = strrchr(G.wget_buf, ')');
406 if (str) str[0] = '\0';
407 str = strrchr(G.wget_buf, ',');
408 if (!str) goto pasv_error;
409 port = xatou_range(str+1, 0, 255);
410 *str = '\0';
411 str = strrchr(G.wget_buf, ',');
412 if (!str) goto pasv_error;
413 port += xatou_range(str+1, 0, 255) * 256;
414 set_nport(&lsa->u.sa, htons(port));
416 *dfpp = open_socket(lsa);
418 if (G.beg_range) {
419 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
420 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
421 G.content_len -= G.beg_range;
424 if (ftpcmd("RETR ", target->path, sfp) > 150)
425 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
427 return sfp;
430 static void NOINLINE retrieve_file_data(FILE *dfp)
432 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
433 # if ENABLE_FEATURE_WGET_TIMEOUT
434 unsigned second_cnt;
435 # endif
436 struct pollfd polldata;
438 polldata.fd = fileno(dfp);
439 polldata.events = POLLIN | POLLPRI;
440 #endif
441 progress_meter(PROGRESS_START);
443 if (G.chunked)
444 goto get_clen;
446 /* Loops only if chunked */
447 while (1) {
449 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
450 /* Must use nonblocking I/O, otherwise fread will loop
451 * and *block* until it reads full buffer,
452 * which messes up progress bar and/or timeout logic.
453 * Because of nonblocking I/O, we need to dance
454 * very carefully around EAGAIN. See explanation at
455 * clearerr() call.
457 ndelay_on(polldata.fd);
458 #endif
459 while (1) {
460 int n;
461 unsigned rdsz;
463 rdsz = sizeof(G.wget_buf);
464 if (G.got_clen) {
465 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
466 if ((int)G.content_len <= 0)
467 break;
468 rdsz = (unsigned)G.content_len;
472 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
473 # if ENABLE_FEATURE_WGET_TIMEOUT
474 second_cnt = G.timeout_seconds;
475 # endif
476 while (1) {
477 if (safe_poll(&polldata, 1, 1000) != 0)
478 break; /* error, EOF, or data is available */
479 # if ENABLE_FEATURE_WGET_TIMEOUT
480 if (second_cnt != 0 && --second_cnt == 0) {
481 progress_meter(PROGRESS_END);
482 bb_error_msg_and_die("download timed out");
484 # endif
485 /* Needed for "stalled" indicator */
486 progress_meter(PROGRESS_BUMP);
489 /* fread internally uses read loop, which in our case
490 * is usually exited when we get EAGAIN.
491 * In this case, libc sets error marker on the stream.
492 * Need to clear it before next fread to avoid possible
493 * rare false positive ferror below. Rare because usually
494 * fread gets more than zero bytes, and we don't fall
495 * into if (n <= 0) ...
497 clearerr(dfp);
498 errno = 0;
499 #endif
500 n = fread(G.wget_buf, 1, rdsz, dfp);
501 /* man fread:
502 * If error occurs, or EOF is reached, the return value
503 * is a short item count (or zero).
504 * fread does not distinguish between EOF and error.
506 if (n <= 0) {
507 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
508 if (errno == EAGAIN) /* poll lied, there is no data? */
509 continue; /* yes */
510 #endif
511 if (ferror(dfp))
512 bb_perror_msg_and_die(bb_msg_read_error);
513 break; /* EOF, not error */
516 xwrite(G.output_fd, G.wget_buf, n);
518 #if ENABLE_FEATURE_WGET_STATUSBAR
519 G.transferred += n;
520 progress_meter(PROGRESS_BUMP);
521 #endif
522 if (G.got_clen) {
523 G.content_len -= n;
524 if (G.content_len == 0)
525 break;
528 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
529 clearerr(dfp);
530 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
531 #endif
532 if (!G.chunked)
533 break;
535 fgets_and_trim(dfp); /* Eat empty line */
536 get_clen:
537 fgets_and_trim(dfp);
538 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
539 /* FIXME: error check? */
540 if (G.content_len == 0)
541 break; /* all done! */
542 G.got_clen = 1;
545 /* Draw full bar and free its resources */
546 G.chunked = 0; /* makes it show 100% even for chunked download */
547 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
548 progress_meter(PROGRESS_END);
551 static void download_one_url(const char *url)
553 bool use_proxy; /* Use proxies if env vars are set */
554 int redir_limit;
555 len_and_sockaddr *lsa;
556 FILE *sfp; /* socket to web/ftp server */
557 FILE *dfp; /* socket to ftp server (data) */
558 char *proxy = NULL;
559 char *fname_out_alloc;
560 char *redirected_path = NULL;
561 struct host_info server;
562 struct host_info target;
564 server.allocated = NULL;
565 target.allocated = NULL;
566 server.user = NULL;
567 target.user = NULL;
569 parse_url(url, &target);
571 /* Use the proxy if necessary */
572 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
573 if (use_proxy) {
574 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
575 use_proxy = (proxy && proxy[0]);
576 if (use_proxy)
577 parse_url(proxy, &server);
579 if (!use_proxy) {
580 server.port = target.port;
581 if (ENABLE_FEATURE_IPV6) {
582 //free(server.allocated); - can't be non-NULL
583 server.host = server.allocated = xstrdup(target.host);
584 } else {
585 server.host = target.host;
589 if (ENABLE_FEATURE_IPV6)
590 strip_ipv6_scope_id(target.host);
592 /* If there was no -O FILE, guess output filename */
593 fname_out_alloc = NULL;
594 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
595 G.fname_out = bb_get_last_path_component_nostrip(target.path);
596 /* handle "wget http://kernel.org//" */
597 if (G.fname_out[0] == '/' || !G.fname_out[0])
598 G.fname_out = (char*)"index.html";
599 /* -P DIR is considered only if there was no -O FILE */
600 else {
601 if (G.dir_prefix)
602 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
603 else {
604 /* redirects may free target.path later, need to make a copy */
605 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
609 #if ENABLE_FEATURE_WGET_STATUSBAR
610 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
611 #endif
613 /* Determine where to start transfer */
614 G.beg_range = 0;
615 if (option_mask32 & WGET_OPT_CONTINUE) {
616 G.output_fd = open(G.fname_out, O_WRONLY);
617 if (G.output_fd >= 0) {
618 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
620 /* File doesn't exist. We do not create file here yet.
621 * We are not sure it exists on remote side */
624 redir_limit = 5;
625 resolve_lsa:
626 lsa = xhost2sockaddr(server.host, server.port);
627 if (!(option_mask32 & WGET_OPT_QUIET)) {
628 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
629 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
630 free(s);
632 establish_session:
633 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
634 G.got_clen = 0;
635 G.chunked = 0;
636 if (use_proxy || !target.is_ftp) {
638 * HTTP session
640 char *str;
641 int status;
644 /* Open socket to http server */
645 sfp = open_socket(lsa);
647 /* Send HTTP request */
648 if (use_proxy) {
649 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
650 target.is_ftp ? "f" : "ht", target.host,
651 target.path);
652 } else {
653 if (option_mask32 & WGET_OPT_POST_DATA)
654 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
655 else
656 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
659 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
660 target.host, G.user_agent);
662 /* Ask server to close the connection as soon as we are done
663 * (IOW: we do not intend to send more requests)
665 fprintf(sfp, "Connection: close\r\n");
667 #if ENABLE_FEATURE_WGET_AUTHENTICATION
668 if (target.user) {
669 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
670 base64enc(target.user));
672 if (use_proxy && server.user) {
673 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
674 base64enc(server.user));
676 #endif
678 if (G.beg_range)
679 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
681 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
682 if (G.extra_headers)
683 fputs(G.extra_headers, sfp);
685 if (option_mask32 & WGET_OPT_POST_DATA) {
686 fprintf(sfp,
687 "Content-Type: application/x-www-form-urlencoded\r\n"
688 "Content-Length: %u\r\n"
689 "\r\n"
690 "%s",
691 (int) strlen(G.post_data), G.post_data
693 } else
694 #endif
696 fprintf(sfp, "\r\n");
699 fflush(sfp);
702 * Retrieve HTTP response line and check for "200" status code.
704 read_response:
705 fgets_and_trim(sfp);
707 str = G.wget_buf;
708 str = skip_non_whitespace(str);
709 str = skip_whitespace(str);
710 // FIXME: no error check
711 // xatou wouldn't work: "200 OK"
712 status = atoi(str);
713 switch (status) {
714 case 0:
715 case 100:
716 while (gethdr(sfp) != NULL)
717 /* eat all remaining headers */;
718 goto read_response;
719 case 200:
721 Response 204 doesn't say "null file", it says "metadata
722 has changed but data didn't":
724 "10.2.5 204 No Content
725 The server has fulfilled the request but does not need to return
726 an entity-body, and might want to return updated metainformation.
727 The response MAY include new or updated metainformation in the form
728 of entity-headers, which if present SHOULD be associated with
729 the requested variant.
731 If the client is a user agent, it SHOULD NOT change its document
732 view from that which caused the request to be sent. This response
733 is primarily intended to allow input for actions to take place
734 without causing a change to the user agent's active document view,
735 although any new or updated metainformation SHOULD be applied
736 to the document currently in the user agent's active view.
738 The 204 response MUST NOT include a message-body, and thus
739 is always terminated by the first empty line after the header fields."
741 However, in real world it was observed that some web servers
742 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
744 case 204:
745 break;
746 case 300: /* redirection */
747 case 301:
748 case 302:
749 case 303:
750 break;
751 case 206:
752 if (G.beg_range)
753 break;
754 /* fall through */
755 default:
756 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
760 * Retrieve HTTP headers.
762 while ((str = gethdr(sfp)) != NULL) {
763 static const char keywords[] ALIGN1 =
764 "content-length\0""transfer-encoding\0""location\0";
765 enum {
766 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
768 smalluint key;
770 /* gethdr converted "FOO:" string to lowercase */
772 /* strip trailing whitespace */
773 char *s = strchrnul(str, '\0') - 1;
774 while (s >= str && (*s == ' ' || *s == '\t')) {
775 *s = '\0';
776 s--;
778 key = index_in_strings(keywords, G.wget_buf) + 1;
779 if (key == KEY_content_length) {
780 G.content_len = BB_STRTOOFF(str, NULL, 10);
781 if (G.content_len < 0 || errno) {
782 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
784 G.got_clen = 1;
785 continue;
787 if (key == KEY_transfer_encoding) {
788 if (strcmp(str_tolower(str), "chunked") != 0)
789 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
790 G.chunked = 1;
792 if (key == KEY_location && status >= 300) {
793 if (--redir_limit == 0)
794 bb_error_msg_and_die("too many redirections");
795 fclose(sfp);
796 if (str[0] == '/') {
797 free(redirected_path);
798 target.path = redirected_path = xstrdup(str+1);
799 /* lsa stays the same: it's on the same server */
800 } else {
801 parse_url(str, &target);
802 if (!use_proxy) {
803 free(server.allocated);
804 server.allocated = NULL;
805 server.host = target.host;
806 /* strip_ipv6_scope_id(target.host); - no! */
807 /* we assume remote never gives us IPv6 addr with scope id */
808 server.port = target.port;
809 free(lsa);
810 goto resolve_lsa;
811 } /* else: lsa stays the same: we use proxy */
813 goto establish_session;
816 // if (status >= 300)
817 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
819 /* For HTTP, data is pumped over the same connection */
820 dfp = sfp;
822 } else {
824 * FTP session
826 sfp = prepare_ftp_session(&dfp, &target, lsa);
829 free(lsa);
831 if (!(option_mask32 & WGET_OPT_SPIDER)) {
832 if (G.output_fd < 0)
833 G.output_fd = xopen(G.fname_out, G.o_flags);
834 retrieve_file_data(dfp);
835 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
836 xclose(G.output_fd);
837 G.output_fd = -1;
841 if (dfp != sfp) {
842 /* It's ftp. Close data connection properly */
843 fclose(dfp);
844 if (ftpcmd(NULL, NULL, sfp) != 226)
845 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
846 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
848 fclose(sfp);
850 free(server.allocated);
851 free(target.allocated);
852 free(fname_out_alloc);
853 free(redirected_path);
856 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
857 int wget_main(int argc UNUSED_PARAM, char **argv)
859 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
860 static const char wget_longopts[] ALIGN1 =
861 /* name, has_arg, val */
862 "continue\0" No_argument "c"
863 //FIXME: -s isn't --spider, it's --save-headers!
864 "spider\0" No_argument "s"
865 "quiet\0" No_argument "q"
866 "output-document\0" Required_argument "O"
867 "directory-prefix\0" Required_argument "P"
868 "proxy\0" Required_argument "Y"
869 "user-agent\0" Required_argument "U"
870 #if ENABLE_FEATURE_WGET_TIMEOUT
871 "timeout\0" Required_argument "T"
872 #endif
873 /* Ignored: */
874 // "tries\0" Required_argument "t"
875 /* Ignored (we always use PASV): */
876 "passive-ftp\0" No_argument "\xff"
877 "header\0" Required_argument "\xfe"
878 "post-data\0" Required_argument "\xfd"
879 /* Ignored (we don't do ssl) */
880 "no-check-certificate\0" No_argument "\xfc"
882 #endif
884 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
885 llist_t *headers_llist = NULL;
886 #endif
888 INIT_G();
890 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
891 G.proxy_flag = "on"; /* use proxies if env vars are set */
892 G.user_agent = "Wget"; /* "User-Agent" header field */
894 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
895 applet_long_options = wget_longopts;
896 #endif
897 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
898 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
899 &G.fname_out, &G.dir_prefix,
900 &G.proxy_flag, &G.user_agent,
901 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
902 NULL /* -t RETRIES */
903 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
904 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
906 argv += optind;
908 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
909 if (headers_llist) {
910 int size = 1;
911 char *cp;
912 llist_t *ll = headers_llist;
913 while (ll) {
914 size += strlen(ll->data) + 2;
915 ll = ll->link;
917 G.extra_headers = cp = xmalloc(size);
918 while (headers_llist) {
919 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
922 #endif
924 G.output_fd = -1;
925 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
926 if (G.fname_out) { /* -O FILE ? */
927 if (LONE_DASH(G.fname_out)) { /* -O - ? */
928 G.output_fd = 1;
929 option_mask32 &= ~WGET_OPT_CONTINUE;
931 /* compat with wget: -O FILE can overwrite */
932 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
935 while (*argv)
936 download_one_url(*argv++);
938 if (G.output_fd >= 0)
939 xclose(G.output_fd);
941 return EXIT_SUCCESS;