3 # Simple user agent using LWP library.
7 lwp-request, GET, POST, HEAD - Simple command line user agent
11 B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>]
12 [B<-i> I<if-modified-since>] [B<-c> I<content-type>]
13 [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>...
17 This program can be used to send requests to WWW servers and your
18 local file system. The request content for POST and PUT
19 methods is read from stdin. The content of the response is printed on
20 stdout. Error messages are printed on stderr. The program returns a
21 status value indicating the number of URLs that failed.
29 Set which method to use for the request. If this option is not used,
30 then the method is derived from the name of the program.
34 Force request through, even if the program believes that the method is
35 illegal. The server might reject the request eventually.
39 This URI will be used as the base URI for resolving all relative URIs
44 Set the timeout value for the requests. The timeout is the amount of
45 time that the program will wait for a response from the remote server
46 before it fails. The default unit for the timeout value is seconds.
47 You might append "m" or "h" to the timeout value to make it minutes or
48 hours, respectively. The default timeout is '3m', i.e. 3 minutes.
52 Set the If-Modified-Since header in the request. If I<time> is the
53 name of a file, use the modification timestamp for this file. If
54 I<time> is not a file, it is parsed as a literal date. Take a look at
55 L<HTTP::Date> for recognized formats.
57 =item -c <content-type>
59 Set the Content-Type for the request. This option is only allowed for
60 requests that take a content, i.e. POST and PUT. You can
61 force methods to take content by using the C<-f> option together with
62 C<-c>. The default Content-Type for POST is
63 C<application/x-www-form-urlencoded>. The default Content-type for
64 the others is C<text/plain>.
68 Set the proxy to be used for the requests. The program also loads
69 proxy settings from the environment. You can disable this with the
74 Don't load proxy settings from environment.
78 Send this HTTP header with each request. You can specify several, e.g.:
81 -H 'Referer: http://other.url/' \
85 =item -C <username>:<password>
87 Provide credentials for documents that are protected by Basic
88 Authentication. If the document is protected and you did not specify
89 the username and password with this option, then you will be prompted
90 to provide these values.
94 The following options controls what is displayed by the program:
100 Print request method and absolute URL as requests are made.
104 Print request headers in addition to request method and absolute URL.
108 Print response status code. This option is always on for HEAD requests.
112 Print response status chain. This shows redirect and authorization
113 requests that are handled by the library.
117 Print response headers. This option is always on for HEAD requests.
121 Do B<not> print the content of the response.
125 Process HTML content in various ways before printing it. If the
126 content type of the response is not HTML, then this option has no
127 effect. The legal format values are; I<text>, I<ps>, I<links>,
130 If you specify the I<text> format then the HTML will be formatted as
131 plain latin1 text. If you specify the I<ps> format then it will be
132 formatted as Postscript.
134 The I<links> format will output all links found in the HTML document.
135 Relative links will be expanded to absolute ones.
137 The I<html> format will reformat the HTML code and the I<dump> format
138 will just dump the HTML syntax tree.
140 Note that the C<HTML-Tree> distribution needs to be installed for this
141 option to work. In addition the C<HTML-Format> distribution needs to
142 be installed for I<-o text> or I<-o ps> to work.
146 Print the version number of the program and quit.
150 Print usage message and quit.
154 Extra debugging output.
158 Set text(ascii) mode for content input and output. If this option is not
159 used, content input and output is done in binary mode.
163 Because this program is implemented using the LWP library, it will
164 only support the protocols that LWP supports.
168 L<lwp-mirror>, L<LWP>
172 Copyright 1995-1999 Gisle Aas.
174 This library is free software; you can redistribute it and/or
175 modify it under the same terms as Perl itself.
179 Gisle Aas <gisle@aas.no>
184 $progname =~ s
,.*[\\/],,; # use basename only
185 $progname =~ s/\.\w*$//; # strip extension, if any
194 use URI
::Heuristic
qw(uf_uri);
196 use HTTP
::Status
qw(status_message);
197 use HTTP
::Date
qw(time2str str2time);
200 # This table lists the methods that are allowed. It should really be
201 # a superset for all methods supported for every scheme that may be
202 # supported by the library. Currently it might be a bit too HTTP
203 # specific. You might use the -f option to force a method through.
205 # "" = No content in request, "C" = Needs content in request
218 # We make our own specialization of LWP::UserAgent that asks for
219 # user/password if document is protected.
221 package RequestAgent
;
222 @ISA = qw(LWP::UserAgent);
226 my $self = LWP
::UserAgent
::new
(@_);
227 $self->agent("lwp-request/$main::VERSION ");
231 sub get_basic_credentials
233 my($self, $realm, $uri) = @_;
234 if ($main::options
{'C'}) {
235 return split(':', $main::options
{'C'}, 2);
238 my $netloc = $uri->host_port;
239 print "Enter username for $realm at $netloc: ";
242 return (undef, undef) unless length $user;
244 system("stty -echo");
245 my $password = <STDIN
>;
247 print "\n"; # because we disabled echo
249 return ($user, $password);
252 return (undef, undef)
257 $method = uc(lc($progname) eq "lwp-request" ?
"GET" : $progname);
263 'a', # content i/o in text(ascii) mode
265 'f', # make request even if method is not in %allowed_methods
268 'i=s', # if-modified-since
269 'c=s', # content type for POST
270 'C=s', # credentials for basic authorization
271 'H=s@', # extra headers, form "Header: value string"
273 'u', # display method, URL and headers of request
274 'U', # display request headers also
275 's', # display status code
276 'S', # display whole chain of status codes
277 'e', # display response headers (default for HEAD)
278 'd', # don't display content
283 'x', # extra debugging info
285 'P', # don't load proxy setting from environment
287 'o=s', # output format
290 Getopt
::Long
::config
("noignorecase", "bundling");
291 unless (GetOptions
(\
%options, @getopt_args)) {
296 my $DISTNAME = 'libwww-perl-' . LWP
::Version
();
298 This is lwp-request version $VERSION ($DISTNAME)
300 Copyright 1995-1999, Gisle Aas.
302 This program is free software; you can redistribute it and/or
303 modify it under the same terms as Perl itself.
307 usage
() if $options{'h'} || !@ARGV;
309 LWP
::Debug
::level
('+') if $options{'x'};
311 # Create the user agent object
312 $ua = RequestAgent
->new;
314 # Load proxy settings from *_proxy environment variables.
315 $ua->env_proxy unless $options{'P'};
317 $method = uc($options{'m'}) if defined $options{'m'};
321 $allowed_methods{$method} = "C"; # force content
324 $allowed_methods{$method} = "";
327 elsif (!defined $allowed_methods{$method}) {
328 die "$progname: $method is not an allowed method\n";
331 if ($method eq "HEAD") {
333 $options{'e'} = 1 unless $options{'d'};
337 if (defined $options{'t'}) {
338 $options{'t'} =~ /^(\d+)([smh])?/;
339 die "$progname: Illegal timeout value!\n" unless defined $1;
342 $timeout *= 60 if $2 eq "m";
343 $timeout *= 3600 if $2 eq "h";
345 $ua->timeout($timeout);
348 if (defined $options{'i'}) {
349 if (-e
$options{'i'}) {
353 $time = str2time
($options{'i'});
354 die "$progname: Illegal time syntax for -i option\n"
355 unless defined $time;
357 $options{'i'} = time2str
($time);
361 if ($allowed_methods{$method} eq "C") {
362 # This request needs some content
363 unless (defined $options{'c'}) {
364 # set default content type
365 $options{'c'} = ($method eq "POST") ?
366 "application/x-www-form-urlencoded"
370 die "$progname: Illegal Content-type format\n"
371 unless $options{'c'} =~ m
,^[\w\
-]+/[\w\
-.+]+(?
:\s
*;.*)?
$,
373 print "Please enter content ($options{'c'}) to be ${method}ed:\n"
375 binmode STDIN
unless -t
or $options{'a'};
376 $content = join("", <STDIN
>);
379 die "$progname: Can't set Content-type for $method requests\n"
380 if defined $options{'c'};
383 # Set up a request. We will use the same request object for all URLs.
384 $request = HTTP
::Request
->new($method);
385 $request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'};
386 for my $user_header (@
{ $options{'H'} || [] }) {
387 my ($header_name, $header_value) = split /:\s*/, $user_header, 2;
388 $request->header($header_name, $header_value);
389 $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh!
391 #$request->header('Accept', '*/*');
392 if ($options{'c'}) { # will always be set for request that wants content
393 $request->header('Content-Type', $options{'c'});
394 $request->header('Content-Length', length $content); # Not really needed
395 $request->content($content);
400 # Ok, now we perform the requests, one URL at a time
401 while ($url = shift) {
402 # Create the URL object, but protect us against bad URLs
404 if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification
405 $url = URI
->new($url, $options{'b'});
406 $url = $url->abs($options{'b'}) if $options{'b'};
413 $@
=~ s/ at .* line \d+.*//;
419 $ua->proxy($url->scheme, $options{'p'}) if $options{'p'};
421 # Send the request and get a response back from the server
423 $response = $ua->request($request);
425 if ($options{'u'} || $options{'U'}) {
426 my $url = $response->request->url->as_string;
427 print "$method $url\n";
428 print $response->request->headers_as_string, "\n" if $options{'U'};
432 printResponseChain
($response);
434 elsif ($options{'s'}) {
435 print $response->status_line, "\n";
440 print $response->headers_as_string;
441 print "\n"; # separate headers and content
444 unless ($options{'d'}) {
446 $response->content_type eq 'text/html') {
451 if ($@
=~ m
,^Can
't locate HTML/Parse.pm in \@INC,) {
452 die "The HTML-Tree distribution need to be installed for the -o option to be used.\n";
458 my $html = HTML::Parse::parse_html($response->content);
460 $options{'o
'} eq 'ps
' && do {
461 require HTML::FormatPS;
462 my $f = HTML::FormatPS->new;
463 print $f->format($html);
466 $options{'o
'} eq 'text
' && do {
467 require HTML::FormatText;
468 my $f = HTML::FormatText->new;
469 print $f->format($html);
472 $options{'o
'} eq 'html
' && do {
473 print $html->as_HTML;
476 $options{'o
'} eq 'links
' && do {
477 my $base = $response->base;
478 $base = $options{'b
'} if $options{'b
'};
479 for ( @{ $html->extract_links } ) {
480 my($link, $elem) = @$_;
481 my $tag = uc $elem->tag;
482 $link = URI->new($link)->abs($base)->as_string;
483 print "$tag\t$link\n";
487 $options{'o
'} eq 'dump' && do {
491 # It is bad to not notice this before now :-(
492 die "Illegal -o option value ($options{'o
'})\n";
496 binmode STDOUT unless $options{'a
'};
497 print $response->content;
501 $errors++ unless $response->is_success;
507 sub printResponseChain
510 return unless defined $response;
511 printResponseChain($response->previous);
512 my $method = $response->request->method;
513 my $url = $response->request->url->as_string;
514 my $code = $response->code;
515 print "$method $url --> ", $response->status_line, "\n";
522 Usage: $progname [-options] <url>...
523 -m <method> use method for the request (default is '$method')
524 -f make request even if $progname believes method is illegal
525 -b <base> Use the specified URL as base
526 -t <timeout> Set timeout value
527 -i <time> Set the If-Modified-Since header on the request
528 -c <conttype> use this content-type for POST, PUT, CHECKIN
529 -a Use text mode for content I/O
530 -p <proxyurl> use this as a proxy
531 -P don't load proxy settings from environment
532 -H
<header
> send this HTTP header
(you can specify several
)
533 -C
<username
>:<password
>
534 provide credentials
for basic authentication
536 -u Display method
and URL before any response
537 -U Display request headers
(implies
-u
)
538 -s Display response status code
539 -S Display response status chain
540 -e Display response headers
541 -d Do
not display content
542 -o
<format
> Process HTML content
in various ways
544 -v Show program version
545 -h Print this message
547 -x Extra debugging output