Merge branch 'Teaman-ND' into Teaman-RT
[tomato.git] / release / src / router / busybox / networking / wget.c
blobafe0d3ab768465beb491aef868fa80fd1f6f6495
1 /* vi: set sw=4 ts=4: */
2 /*
3 * wget - retrieve a file using HTTP or FTP
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
11 #include "libbb.h"
13 struct host_info {
14 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
16 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
24 /* Globals */
25 struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28 #if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
31 bb_progress_t pmt;
32 #endif
33 #if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35 #endif
36 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
38 } FIX_ALIASING;
39 #define G (*(struct globals*)&bb_common_bufsiz1)
40 struct BUG_G_too_big {
41 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
43 #define INIT_G() do { \
44 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
45 } while (0)
48 /* Must match option string! */
49 enum {
50 WGET_OPT_CONTINUE = (1 << 0),
51 WGET_OPT_SPIDER = (1 << 1),
52 WGET_OPT_QUIET = (1 << 2),
53 WGET_OPT_OUTNAME = (1 << 3),
54 WGET_OPT_PREFIX = (1 << 4),
55 WGET_OPT_PROXY = (1 << 5),
56 WGET_OPT_USER_AGENT = (1 << 6),
57 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
58 WGET_OPT_RETRIES = (1 << 8),
59 WGET_OPT_PASSIVE = (1 << 9),
60 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
61 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
64 enum {
65 PROGRESS_START = -1,
66 PROGRESS_END = 0,
67 PROGRESS_BUMP = 1,
69 #if ENABLE_FEATURE_WGET_STATUSBAR
70 static void progress_meter(int flag)
72 if (option_mask32 & WGET_OPT_QUIET)
73 return;
75 if (flag == PROGRESS_START)
76 bb_progress_init(&G.pmt);
78 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
79 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
81 if (flag == PROGRESS_END) {
82 bb_putchar_stderr('\n');
83 G.transferred = 0;
86 #else
87 static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
88 #endif
91 /* IPv6 knows scoped address types i.e. link and site local addresses. Link
92 * local addresses can have a scope identifier to specify the
93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
94 * identifier is only valid on a single node.
96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
98 * in the Host header as invalid requests, see
99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
101 static void strip_ipv6_scope_id(char *host)
103 char *scope, *cp;
105 /* bbox wget actually handles IPv6 addresses without [], like
106 * wget "http://::1/xxx", but this is not standard.
107 * To save code, _here_ we do not support it. */
109 if (host[0] != '[')
110 return; /* not IPv6 */
112 scope = strchr(host, '%');
113 if (!scope)
114 return;
116 /* Remove the IPv6 zone identifier from the host address */
117 cp = strchr(host, ']');
118 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
119 /* malformed address (not "[xx]:nn" or "[xx]") */
120 return;
123 /* cp points to "]...", scope points to "%eth0]..." */
124 overlapping_strcpy(scope, cp);
127 /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
128 * and a short count if an eof or non-interrupt error is encountered. */
129 static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
131 size_t ret;
132 char *p = (char*)ptr;
134 do {
135 clearerr(stream);
136 errno = 0;
137 ret = fread(p, 1, nmemb, stream);
138 p += ret;
139 nmemb -= ret;
140 } while (nmemb && ferror(stream) && errno == EINTR);
142 return p - (char*)ptr;
145 /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
146 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
147 static char *safe_fgets(char *s, int size, FILE *stream)
149 char *ret;
151 do {
152 clearerr(stream);
153 errno = 0;
154 ret = fgets(s, size, stream);
155 } while (ret == NULL && ferror(stream) && errno == EINTR);
157 return ret;
160 #if ENABLE_FEATURE_WGET_AUTHENTICATION
161 /* Base64-encode character string. buf is assumed to be char buf[512]. */
162 static char *base64enc_512(char buf[512], const char *str)
164 unsigned len = strlen(str);
165 if (len > 512/4*3 - 10) /* paranoia */
166 len = 512/4*3 - 10;
167 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
168 return buf;
170 #endif
172 static char* sanitize_string(char *s)
174 unsigned char *p = (void *) s;
175 while (*p >= ' ')
176 p++;
177 *p = '\0';
178 return s;
181 static FILE *open_socket(len_and_sockaddr *lsa)
183 FILE *fp;
185 /* glibc 2.4 seems to try seeking on it - ??! */
186 /* hopefully it understands what ESPIPE means... */
187 fp = fdopen(xconnect_stream(lsa), "r+");
188 if (fp == NULL)
189 bb_perror_msg_and_die("fdopen");
191 return fp;
194 static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
196 int result;
197 if (s1) {
198 if (!s2) s2 = "";
199 fprintf(fp, "%s%s\r\n", s1, s2);
200 fflush(fp);
203 do {
204 char *buf_ptr;
206 if (fgets(buf, 510, fp) == NULL) {
207 bb_perror_msg_and_die("error getting response");
209 buf_ptr = strstr(buf, "\r\n");
210 if (buf_ptr) {
211 *buf_ptr = '\0';
213 } while (!isdigit(buf[0]) || buf[3] != ' ');
215 buf[3] = '\0';
216 result = xatoi_positive(buf);
217 buf[3] = ' ';
218 return result;
221 static void parse_url(char *src_url, struct host_info *h)
223 char *url, *p, *sp;
225 /* h->allocated = */ url = xstrdup(src_url);
227 if (strncmp(url, "http://", 7) == 0) {
228 h->port = bb_lookup_port("http", "tcp", 80);
229 h->host = url + 7;
230 h->is_ftp = 0;
231 } else if (strncmp(url, "ftp://", 6) == 0) {
232 h->port = bb_lookup_port("ftp", "tcp", 21);
233 h->host = url + 6;
234 h->is_ftp = 1;
235 } else
236 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
238 // FYI:
239 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
240 // 'GET /?var=a/b HTTP 1.0'
241 // and saves 'index.html?var=a%2Fb' (we save 'b')
242 // wget 'http://busybox.net?login=john@doe':
243 // request: 'GET /?login=john@doe HTTP/1.0'
244 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
245 // wget 'http://busybox.net#test/test':
246 // request: 'GET / HTTP/1.0'
247 // saves: 'index.html' (we save 'test')
249 // We also don't add unique .N suffix if file exists...
250 sp = strchr(h->host, '/');
251 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
252 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
253 if (!sp) {
254 h->path = "";
255 } else if (*sp == '/') {
256 *sp = '\0';
257 h->path = sp + 1;
258 } else { // '#' or '?'
259 // http://busybox.net?login=john@doe is a valid URL
260 // memmove converts to:
261 // http:/busybox.nett?login=john@doe...
262 memmove(h->host - 1, h->host, sp - h->host);
263 h->host--;
264 sp[-1] = '\0';
265 h->path = sp;
268 // We used to set h->user to NULL here, but this interferes
269 // with handling of code 302 ("object was moved")
271 sp = strrchr(h->host, '@');
272 if (sp != NULL) {
273 h->user = h->host;
274 *sp = '\0';
275 h->host = sp + 1;
278 sp = h->host;
281 static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
283 char *s, *hdrval;
284 int c;
286 /* *istrunc = 0; */
288 /* retrieve header line */
289 if (fgets(buf, bufsiz, fp) == NULL)
290 return NULL;
292 /* see if we are at the end of the headers */
293 for (s = buf; *s == '\r'; ++s)
294 continue;
295 if (*s == '\n')
296 return NULL;
298 /* convert the header name to lower case */
299 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
300 /* tolower for "A-Z", no-op for "0-9a-z-." */
301 *s = (*s | 0x20);
304 /* verify we are at the end of the header name */
305 if (*s != ':')
306 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
308 /* locate the start of the header value */
309 *s++ = '\0';
310 hdrval = skip_whitespace(s);
312 /* locate the end of header */
313 while (*s && *s != '\r' && *s != '\n')
314 ++s;
316 /* end of header found */
317 if (*s) {
318 *s = '\0';
319 return hdrval;
322 /* Rats! The buffer isn't big enough to hold the entire header value */
323 while (c = getc(fp), c != EOF && c != '\n')
324 continue;
325 /* *istrunc = 1; */
326 return hdrval;
329 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
330 static char *URL_escape(const char *str)
332 /* URL encode, see RFC 2396 */
333 char *dst;
334 char *res = dst = xmalloc(strlen(str) * 3 + 1);
335 unsigned char c;
337 while (1) {
338 c = *str++;
339 if (c == '\0'
340 /* || strchr("!&'()*-.=_~", c) - more code */
341 || c == '!'
342 || c == '&'
343 || c == '\''
344 || c == '('
345 || c == ')'
346 || c == '*'
347 || c == '-'
348 || c == '.'
349 || c == '='
350 || c == '_'
351 || c == '~'
352 || (c >= '0' && c <= '9')
353 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
355 *dst++ = c;
356 if (c == '\0')
357 return res;
358 } else {
359 *dst++ = '%';
360 *dst++ = bb_hexdigits_upcase[c >> 4];
361 *dst++ = bb_hexdigits_upcase[c & 0xf];
365 #endif
367 static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
369 char buf[512];
370 FILE *sfp;
371 char *str;
372 int port;
374 if (!target->user)
375 target->user = xstrdup("anonymous:busybox@");
377 sfp = open_socket(lsa);
378 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
379 bb_error_msg_and_die("%s", sanitize_string(buf+4));
382 * Splitting username:password pair,
383 * trying to log in
385 str = strchr(target->user, ':');
386 if (str)
387 *str++ = '\0';
388 switch (ftpcmd("USER ", target->user, sfp, buf)) {
389 case 230:
390 break;
391 case 331:
392 if (ftpcmd("PASS ", str, sfp, buf) == 230)
393 break;
394 /* fall through (failed login) */
395 default:
396 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
399 ftpcmd("TYPE I", NULL, sfp, buf);
402 * Querying file size
404 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
405 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
406 if (G.content_len < 0 || errno) {
407 bb_error_msg_and_die("SIZE value is garbage");
409 G.got_clen = 1;
413 * Entering passive mode
415 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
416 pasv_error:
417 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
419 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
420 // Server's IP is N1.N2.N3.N4 (we ignore it)
421 // Server's port for data connection is P1*256+P2
422 str = strrchr(buf, ')');
423 if (str) str[0] = '\0';
424 str = strrchr(buf, ',');
425 if (!str) goto pasv_error;
426 port = xatou_range(str+1, 0, 255);
427 *str = '\0';
428 str = strrchr(buf, ',');
429 if (!str) goto pasv_error;
430 port += xatou_range(str+1, 0, 255) * 256;
431 set_nport(lsa, htons(port));
433 *dfpp = open_socket(lsa);
435 if (G.beg_range) {
436 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
437 if (ftpcmd(buf, NULL, sfp, buf) == 350)
438 G.content_len -= G.beg_range;
441 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
442 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
444 return sfp;
447 static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
449 char buf[4*1024]; /* made bigger to speed up local xfers */
450 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
451 # if ENABLE_FEATURE_WGET_TIMEOUT
452 unsigned second_cnt;
453 # endif
454 struct pollfd polldata;
456 polldata.fd = fileno(dfp);
457 polldata.events = POLLIN | POLLPRI;
458 #endif
459 progress_meter(PROGRESS_START);
461 if (G.chunked)
462 goto get_clen;
464 /* Loops only if chunked */
465 while (1) {
467 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
468 ndelay_on(polldata.fd);
469 #endif
470 while (1) {
471 int n;
472 unsigned rdsz;
474 rdsz = sizeof(buf);
475 if (G.got_clen) {
476 if (G.content_len < (off_t)sizeof(buf)) {
477 if ((int)G.content_len <= 0)
478 break;
479 rdsz = (unsigned)G.content_len;
482 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
483 # if ENABLE_FEATURE_WGET_TIMEOUT
484 second_cnt = G.timeout_seconds;
485 # endif
486 while (1) {
487 if (safe_poll(&polldata, 1, 1000) != 0)
488 break; /* error, EOF, or data is available */
489 # if ENABLE_FEATURE_WGET_TIMEOUT
490 if (second_cnt != 0 && --second_cnt == 0) {
491 progress_meter(PROGRESS_END);
492 bb_perror_msg_and_die("download timed out");
494 # endif
495 /* Needed for "stalled" indicator */
496 progress_meter(PROGRESS_BUMP);
498 #endif
499 /* fread internally uses read loop, which in our case
500 * is usually exited when we get EAGAIN.
501 * In this case, libc sets error marker on the stream.
502 * Need to clear it before next fread to avoid possible
503 * rare false positive ferror below. Rare because usually
504 * fread gets more than zero bytes, and we don't fall
505 * into if (n <= 0) ...
507 clearerr(dfp);
508 errno = 0;
509 n = safe_fread(buf, rdsz, dfp);
510 /* man fread:
511 * If error occurs, or EOF is reached, the return value
512 * is a short item count (or zero).
513 * fread does not distinguish between EOF and error.
515 if (n <= 0) {
516 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
517 if (errno == EAGAIN) /* poll lied, there is no data? */
518 continue; /* yes */
519 #endif
520 if (ferror(dfp))
521 bb_perror_msg_and_die(bb_msg_read_error);
522 break; /* EOF, not error */
525 xwrite(output_fd, buf, n);
526 #if ENABLE_FEATURE_WGET_STATUSBAR
527 G.transferred += n;
528 progress_meter(PROGRESS_BUMP);
529 #endif
530 if (G.got_clen) {
531 G.content_len -= n;
532 if (G.content_len == 0)
533 break;
536 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 ndelay_off(polldata.fd);
538 #endif
540 if (!G.chunked)
541 break;
543 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
544 get_clen:
545 safe_fgets(buf, sizeof(buf), dfp);
546 G.content_len = STRTOOFF(buf, NULL, 16);
547 /* FIXME: error check? */
548 if (G.content_len == 0)
549 break; /* all done! */
550 G.got_clen = 1;
553 progress_meter(PROGRESS_END);
556 int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
557 int wget_main(int argc UNUSED_PARAM, char **argv)
559 char buf[512];
560 struct host_info server, target;
561 len_and_sockaddr *lsa;
562 unsigned opt;
563 int redir_limit;
564 char *proxy = NULL;
565 char *dir_prefix = NULL;
566 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
567 char *post_data;
568 char *extra_headers = NULL;
569 llist_t *headers_llist = NULL;
570 #endif
571 FILE *sfp; /* socket to web/ftp server */
572 FILE *dfp; /* socket to ftp server (data) */
573 char *fname_out; /* where to direct output (-O) */
574 int output_fd = -1;
575 bool use_proxy; /* Use proxies if env vars are set */
576 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
577 const char *user_agent = "Wget";/* "User-Agent" header field */
579 static const char keywords[] ALIGN1 =
580 "content-length\0""transfer-encoding\0""chunked\0""location\0";
581 enum {
582 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
584 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
585 static const char wget_longopts[] ALIGN1 =
586 /* name, has_arg, val */
587 "continue\0" No_argument "c"
588 "spider\0" No_argument "s"
589 "quiet\0" No_argument "q"
590 "output-document\0" Required_argument "O"
591 "directory-prefix\0" Required_argument "P"
592 "proxy\0" Required_argument "Y"
593 "user-agent\0" Required_argument "U"
594 #if ENABLE_FEATURE_WGET_TIMEOUT
595 "timeout\0" Required_argument "T"
596 #endif
597 /* Ignored: */
598 // "tries\0" Required_argument "t"
599 /* Ignored (we always use PASV): */
600 "passive-ftp\0" No_argument "\xff"
601 "header\0" Required_argument "\xfe"
602 "post-data\0" Required_argument "\xfd"
603 /* Ignored (we don't do ssl) */
604 "no-check-certificate\0" No_argument "\xfc"
606 #endif
608 INIT_G();
610 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
611 applet_long_options = wget_longopts;
612 #endif
613 /* server.allocated = target.allocated = NULL; */
614 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
615 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
616 &fname_out, &dir_prefix,
617 &proxy_flag, &user_agent,
618 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
619 NULL /* -t RETRIES */
620 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
621 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
623 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
624 if (headers_llist) {
625 int size = 1;
626 char *cp;
627 llist_t *ll = headers_llist;
628 while (ll) {
629 size += strlen(ll->data) + 2;
630 ll = ll->link;
632 extra_headers = cp = xmalloc(size);
633 while (headers_llist) {
634 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
637 #endif
639 /* TODO: compat issue: should handle "wget URL1 URL2..." */
641 target.user = NULL;
642 parse_url(argv[optind], &target);
644 /* Use the proxy if necessary */
645 use_proxy = (strcmp(proxy_flag, "off") != 0);
646 if (use_proxy) {
647 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
648 if (proxy && proxy[0]) {
649 server.user = NULL;
650 parse_url(proxy, &server);
651 } else {
652 use_proxy = 0;
655 if (!use_proxy) {
656 server.port = target.port;
657 if (ENABLE_FEATURE_IPV6) {
658 server.host = xstrdup(target.host);
659 } else {
660 server.host = target.host;
664 if (ENABLE_FEATURE_IPV6)
665 strip_ipv6_scope_id(target.host);
667 /* Guess an output filename, if there was no -O FILE */
668 if (!(opt & WGET_OPT_OUTNAME)) {
669 fname_out = bb_get_last_path_component_nostrip(target.path);
670 /* handle "wget http://kernel.org//" */
671 if (fname_out[0] == '/' || !fname_out[0])
672 fname_out = (char*)"index.html";
673 /* -P DIR is considered only if there was no -O FILE */
674 if (dir_prefix)
675 fname_out = concat_path_file(dir_prefix, fname_out);
676 } else {
677 if (LONE_DASH(fname_out)) {
678 /* -O - */
679 output_fd = 1;
680 opt &= ~WGET_OPT_CONTINUE;
683 #if ENABLE_FEATURE_WGET_STATUSBAR
684 G.curfile = bb_get_last_path_component_nostrip(fname_out);
685 #endif
687 /* Impossible?
688 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
689 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
692 /* Determine where to start transfer */
693 if (opt & WGET_OPT_CONTINUE) {
694 output_fd = open(fname_out, O_WRONLY);
695 if (output_fd >= 0) {
696 G.beg_range = xlseek(output_fd, 0, SEEK_END);
698 /* File doesn't exist. We do not create file here yet.
699 * We are not sure it exists on remove side */
702 redir_limit = 5;
703 resolve_lsa:
704 lsa = xhost2sockaddr(server.host, server.port);
705 if (!(opt & WGET_OPT_QUIET)) {
706 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
707 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
708 free(s);
710 establish_session:
711 if (use_proxy || !target.is_ftp) {
713 * HTTP session
715 char *str;
716 int status;
718 /* Open socket to http server */
719 sfp = open_socket(lsa);
721 /* Send HTTP request */
722 if (use_proxy) {
723 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
724 target.is_ftp ? "f" : "ht", target.host,
725 target.path);
726 } else {
727 if (opt & WGET_OPT_POST_DATA)
728 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
729 else
730 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
733 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
734 target.host, user_agent);
736 /* Ask server to close the connection as soon as we are done
737 * (IOW: we do not intend to send more requests)
739 fprintf(sfp, "Connection: close\r\n");
741 #if ENABLE_FEATURE_WGET_AUTHENTICATION
742 if (target.user) {
743 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
744 base64enc_512(buf, target.user));
746 if (use_proxy && server.user) {
747 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
748 base64enc_512(buf, server.user));
750 #endif
752 if (G.beg_range)
753 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
755 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
756 if (extra_headers)
757 fputs(extra_headers, sfp);
759 if (opt & WGET_OPT_POST_DATA) {
760 char *estr = URL_escape(post_data);
761 fprintf(sfp,
762 "Content-Type: application/x-www-form-urlencoded\r\n"
763 "Content-Length: %u\r\n"
764 "\r\n"
765 "%s",
766 (int) strlen(estr), estr
768 free(estr);
769 } else
770 #endif
772 fprintf(sfp, "\r\n");
775 fflush(sfp);
778 * Retrieve HTTP response line and check for "200" status code.
780 read_response:
781 if (fgets(buf, sizeof(buf), sfp) == NULL)
782 bb_error_msg_and_die("no response from server");
784 str = buf;
785 str = skip_non_whitespace(str);
786 str = skip_whitespace(str);
787 // FIXME: no error check
788 // xatou wouldn't work: "200 OK"
789 status = atoi(str);
790 switch (status) {
791 case 0:
792 case 100:
793 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
794 /* eat all remaining headers */;
795 goto read_response;
796 case 200:
798 Response 204 doesn't say "null file", it says "metadata
799 has changed but data didn't":
801 "10.2.5 204 No Content
802 The server has fulfilled the request but does not need to return
803 an entity-body, and might want to return updated metainformation.
804 The response MAY include new or updated metainformation in the form
805 of entity-headers, which if present SHOULD be associated with
806 the requested variant.
808 If the client is a user agent, it SHOULD NOT change its document
809 view from that which caused the request to be sent. This response
810 is primarily intended to allow input for actions to take place
811 without causing a change to the user agent's active document view,
812 although any new or updated metainformation SHOULD be applied
813 to the document currently in the user agent's active view.
815 The 204 response MUST NOT include a message-body, and thus
816 is always terminated by the first empty line after the header fields."
818 However, in real world it was observed that some web servers
819 (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
821 case 204:
822 break;
823 case 300: /* redirection */
824 case 301:
825 case 302:
826 case 303:
827 break;
828 case 206:
829 if (G.beg_range)
830 break;
831 /* fall through */
832 default:
833 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
837 * Retrieve HTTP headers.
839 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
840 /* gethdr converted "FOO:" string to lowercase */
841 smalluint key;
842 /* strip trailing whitespace */
843 char *s = strchrnul(str, '\0') - 1;
844 while (s >= str && (*s == ' ' || *s == '\t')) {
845 *s = '\0';
846 s--;
848 key = index_in_strings(keywords, buf) + 1;
849 if (key == KEY_content_length) {
850 G.content_len = BB_STRTOOFF(str, NULL, 10);
851 if (G.content_len < 0 || errno) {
852 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
854 G.got_clen = 1;
855 continue;
857 if (key == KEY_transfer_encoding) {
858 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
859 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
860 G.chunked = G.got_clen = 1;
862 if (key == KEY_location && status >= 300) {
863 if (--redir_limit == 0)
864 bb_error_msg_and_die("too many redirections");
865 fclose(sfp);
866 G.got_clen = 0;
867 G.chunked = 0;
868 if (str[0] == '/')
869 /* free(target.allocated); */
870 target.path = /* target.allocated = */ xstrdup(str+1);
871 /* lsa stays the same: it's on the same server */
872 else {
873 parse_url(str, &target);
874 if (!use_proxy) {
875 server.host = target.host;
876 /* strip_ipv6_scope_id(target.host); - no! */
877 /* we assume remote never gives us IPv6 addr with scope id */
878 server.port = target.port;
879 free(lsa);
880 goto resolve_lsa;
881 } /* else: lsa stays the same: we use proxy */
883 goto establish_session;
886 // if (status >= 300)
887 // bb_error_msg_and_die("bad redirection (no Location: header from server)");
889 /* For HTTP, data is pumped over the same connection */
890 dfp = sfp;
892 } else {
894 * FTP session
896 sfp = prepare_ftp_session(&dfp, &target, lsa);
899 if (opt & WGET_OPT_SPIDER) {
900 if (ENABLE_FEATURE_CLEAN_UP)
901 fclose(sfp);
902 return EXIT_SUCCESS;
905 if (output_fd < 0) {
906 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
907 /* compat with wget: -O FILE can overwrite */
908 if (opt & WGET_OPT_OUTNAME)
909 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
910 output_fd = xopen(fname_out, o_flags);
913 retrieve_file_data(dfp, output_fd);
914 xclose(output_fd);
916 if (dfp != sfp) {
917 /* It's ftp. Close it properly */
918 fclose(dfp);
919 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
920 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
921 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
924 return EXIT_SUCCESS;