1 # frozen_string_literal: true
7 # Allows the opening of various resources including URIs.
9 # If the first argument responds to the 'open' method, 'open' is called on
10 # it with the rest of the arguments.
12 # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by
13 # URI.parse. If the parsed object responds to the 'open' method,
14 # 'open' is called on it with the rest of the arguments.
16 # Otherwise, Kernel#open is called.
18 # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
19 # URI::FTP#open, Kernel#open.
21 # We can accept URIs and strings that begin with http://, https:// and
22 # ftp://. In these cases, the opened file object is extended by OpenURI::Meta.
23 def self.open(name, *rest, &block)
24 if name.respond_to?(:open)
25 name.open(*rest, &block)
26 elsif name.respond_to?(:to_str) &&
27 %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
28 (uri = URI.parse(name)).respond_to?(:open)
29 uri.open(*rest, &block)
34 singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true)
37 # OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
41 # It is possible to open an http, https or ftp URL as though it were a file:
43 # URI.open("http://www.ruby-lang.org/") {|f|
44 # f.each_line {|line| p line}
47 # The opened file has several getter methods for its meta-information, as
48 # follows, since it is extended by OpenURI::Meta.
50 # URI.open("http://www.ruby-lang.org/en") {|f|
51 # f.each_line {|line| p line}
52 # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
53 # p f.content_type # "text/html"
54 # p f.charset # "iso-8859-1"
55 # p f.content_encoding # []
56 # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
59 # Additional header fields can be specified by an optional hash argument.
61 # URI.open("http://www.ruby-lang.org/en/",
62 # "User-Agent" => "Ruby/#{RUBY_VERSION}",
63 # "From" => "foo@bar.invalid",
64 # "Referer" => "http://www.ruby-lang.org/") {|f|
68 # The environment variables such as http_proxy, https_proxy and ftp_proxy
69 # are in effect by default. Here we disable proxy:
71 # URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
75 # See OpenURI::OpenRead.open and URI.open for more on available options.
77 # URI objects can be opened in a similar way.
79 # uri = URI.parse("http://www.ruby-lang.org/en/")
84 # URI objects can be read directly. The returned string is also extended by
90 # Author:: Tanaka Akira <akr@m17n.org>
98 :proxy_http_basic_authentication => true,
99 :progress_proc => true,
100 :content_length_proc => true,
101 :http_basic_authentication => true,
102 :read_timeout => true,
103 :open_timeout => true,
105 :ssl_verify_mode => nil,
106 :ssl_min_version => nil,
107 :ssl_max_version => nil,
108 :ftp_active_mode => false,
111 :max_redirects => 64,
114 def OpenURI.check_options(options) # :nodoc:
116 next unless Symbol === k
117 unless Options.include? k
118 raise ArgumentError, "unrecognized option: #{k}"
123 def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
124 if !rest.empty? && (String === rest.first || Integer === rest.first)
126 if !rest.empty? && Integer === rest.first
130 return mode, perm, rest
133 def OpenURI.open_uri(name, *rest) # :nodoc:
134 uri = URI::Generic === name ? name : URI.parse(name)
135 mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
136 options = rest.shift if !rest.empty? && Hash === rest.first
137 raise ArgumentError.new("extra arguments") if !rest.empty?
139 OpenURI.check_options(options)
141 if /\Arb?(?:\Z|:([^:]+))/ =~ mode
142 encoding, = $1,Encoding.find($1) if $1
145 if options.has_key? :encoding
147 raise ArgumentError, "encoding specified twice"
149 encoding = Encoding.find(options[:encoding])
152 unless mode == nil ||
153 mode == 'r' || mode == 'rb' ||
155 raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
158 io = open_loop(uri, options)
159 io.set_encoding(encoding) if encoding
164 if io.respond_to? :close!
167 io.close if !io.closed?
175 def OpenURI.open_loop(uri, options) # :nodoc:
177 proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
178 proxy_opts << :proxy if options.include? :proxy
180 if 1 < proxy_opts.length
181 raise ArgumentError, "multiple proxy options specified"
183 case proxy_opts.first
184 when :proxy_http_basic_authentication
185 opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
186 proxy_user = proxy_user.to_str
187 proxy_pass = proxy_pass.to_str
189 raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
192 opt_proxy = options.fetch(:proxy)
202 find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
204 find_proxy = lambda {|u| nil}
206 opt_proxy = URI.parse(opt_proxy)
207 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
209 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
211 raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
215 max_redirects = options[:max_redirects]
218 redirect = catch(:open_uri_redirect) {
220 uri.buffer_open(buf, find_proxy.call(uri), options)
224 if redirect.relative?
225 # Although it violates RFC2616, Location: field may have relative
226 # URI. It is converted to absolute URI using uri as a base URI.
227 redirect = uri + redirect
229 if !options.fetch(:redirect, true)
230 raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
232 unless OpenURI.redirectable?(uri, redirect)
233 raise "redirection forbidden: #{uri} -> #{redirect}"
235 if options.include? :http_basic_authentication
236 # send authentication only for the URI directly specified.
237 options = options.dup
238 options.delete :http_basic_authentication
241 raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
242 uri_set[uri.to_s] = true
243 raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects
253 def OpenURI.redirectable?(uri1, uri2) # :nodoc:
254 # This test is intended to forbid a redirection from http://... to
255 # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
256 # https to http redirect is also forbidden intentionally.
257 # It avoids sending secure cookie or referer by non-secure HTTP protocol.
258 # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
259 # However this is ad hoc. It should be extensible/configurable.
260 uri1.scheme.downcase == uri2.scheme.downcase ||
261 (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
264 def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
266 proxy_uri, proxy_user, proxy_pass = proxy
267 raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
271 raise ArgumentError, "userinfo not supported. [RFC3986]"
275 options.each {|k, v| header[k] = v if String === k }
279 if URI::HTTP === target
282 unless proxy_user && proxy_pass
283 proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
285 if proxy_user && proxy_pass
286 klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
288 klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
291 target_host = target.hostname
292 target_port = target.port
293 request_uri = target.request_uri
295 # FTP over HTTP proxy
296 target_host = proxy_uri.hostname
297 target_port = proxy_uri.port
298 request_uri = target.to_s
299 if proxy_user && proxy_pass
300 header["Proxy-Authorization"] =
301 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
305 http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
306 if target.class == URI::HTTPS
309 http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
310 http.min_version = options[:ssl_min_version]
311 http.max_version = options[:ssl_max_version]
312 store = OpenSSL::X509::Store.new
313 if options[:ssl_ca_cert]
314 Array(options[:ssl_ca_cert]).each do |cert|
315 if File.directory? cert
322 store.set_default_paths
324 http.cert_store = store
326 if options.include? :read_timeout
327 http.read_timeout = options[:read_timeout]
329 if options.include? :open_timeout
330 http.open_timeout = options[:open_timeout]
335 req = Net::HTTP::Get.new(request_uri, header)
336 if options.include? :http_basic_authentication
337 user, pass = options[:http_basic_authentication]
338 req.basic_auth user, pass
340 http.request(req) {|response|
342 if options[:content_length_proc] && Net::HTTPSuccess === resp
343 if resp.key?('Content-Length')
344 options[:content_length_proc].call(resp['Content-Length'].to_i)
346 options[:content_length_proc].call(nil)
349 resp.read_body {|str|
351 if options[:progress_proc] && Net::HTTPSuccess === resp
352 options[:progress_proc].call(buf.size)
360 io.status = [resp.code, resp.message]
361 resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
363 when Net::HTTPSuccess
364 when Net::HTTPMovedPermanently, # 301
365 Net::HTTPFound, # 302
366 Net::HTTPSeeOther, # 303
367 Net::HTTPTemporaryRedirect, # 307
368 Net::HTTPPermanentRedirect # 308
370 loc_uri = URI.parse(resp['location'])
371 rescue URI::InvalidURIError
372 raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
374 throw :open_uri_redirect, loc_uri
376 raise OpenURI::HTTPError.new(io.status.join(' '), io)
380 class HTTPError < StandardError
381 def initialize(message, io)
388 # Raised on redirection,
389 # only occurs when +redirect+ option for HTTP is +false+.
390 class HTTPRedirect < HTTPError
391 def initialize(message, io, uri)
398 class TooManyRedirects < HTTPError
401 class Buffer # :nodoc: all
412 if StringIO === @io && StringMax < @size
414 io = Tempfile.new('open-uri')
416 Meta.init io, @io if Meta === @io
423 Meta.init @io unless Meta === @io
429 RE_LWS = /[\r\n\t ]+/n
430 RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
431 RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
432 RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
435 # Mixin for holding meta-information.
437 def Meta.init(obj, src=nil) # :nodoc:
441 @meta = {} # name to string. legacy.
442 @metas = {} # name to array of strings.
445 obj.status = src.status
446 obj.base_uri = src.base_uri
447 src.metas.each {|name, values|
448 obj.meta_add_field2(name, values)
453 # returns an Array that consists of status code and message.
454 attr_accessor :status
456 # returns a URI that is the base of relative URIs in the data.
457 # It may differ from the URI supplied by a user due to redirection.
458 attr_accessor :base_uri
460 # returns a Hash that represents header fields.
461 # The Hash keys are downcased for canonicalization.
462 # The Hash values are a field body.
463 # If there are multiple field with same field name,
464 # the field values are concatenated with a comma.
467 # returns a Hash that represents header fields.
468 # The Hash keys are downcased for canonicalization.
469 # The Hash value are an array of field values.
472 def meta_setup_encoding # :nodoc:
473 charset = self.charset
477 enc = Encoding.find(charset)
481 enc = Encoding::ASCII_8BIT unless enc
482 if self.respond_to? :force_encoding
483 self.force_encoding(enc)
484 elsif self.respond_to? :string
485 self.string.force_encoding(enc)
487 self.set_encoding enc
491 def meta_add_field2(name, values) # :nodoc:
493 @metas[name] = values
494 @meta[name] = values.join(', ')
495 meta_setup_encoding if name == 'content-type'
498 def meta_add_field(name, value) # :nodoc:
499 meta_add_field2(name, [value])
502 # returns a Time that represents the Last-Modified field.
504 if vs = @metas['last-modified']
512 def content_type_parse # :nodoc:
513 vs = @metas['content-type']
514 # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
515 if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ')
517 subtype = $2.downcase
519 $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
521 val = qval[1...-1].gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& }
523 parameters << [att.downcase, val]
525 ["#{type}/#{subtype}", *parameters]
531 # returns "type/subtype" which is MIME Content-Type.
532 # It is downcased for canonicalization.
533 # Content-Type parameters are stripped.
535 type, *_ = content_type_parse
536 type || 'application/octet-stream'
539 # returns a charset parameter in Content-Type field.
540 # It is downcased for canonicalization.
542 # If charset parameter is not given but a block is given,
543 # the block is called and its result is returned.
544 # It can be used to guess charset.
546 # If charset parameter and block is not given,
547 # nil is returned except text type.
548 # In that case, "utf-8" is returned as defined by RFC6838 4.2.1
550 type, *parameters = content_type_parse
551 if pair = parameters.assoc('charset')
555 elsif type && %r{\Atext/} =~ type
556 "utf-8" # RFC6838 4.2.1
562 # Returns a list of encodings in Content-Encoding field as an array of
565 # The encodings are downcased for canonicalization.
567 vs = @metas['content-encoding']
568 if vs && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ (v = vs.join(', '))
569 v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
576 # Mixin for HTTP and FTP URIs.
578 # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
580 # OpenURI::OpenRead#open takes optional 3 arguments as:
582 # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
584 # OpenURI::OpenRead#open returns an IO-like object if block is not given.
585 # Otherwise it yields the IO object and return the value of the block.
586 # The IO object is extended with OpenURI::Meta.
588 # +mode+ and +perm+ are the same as Kernel#open.
590 # However, +mode+ must be read mode because OpenURI::OpenRead#open doesn't
591 # support write mode (yet).
592 # Also +perm+ is ignored because it is meaningful only for file creation.
594 # +options+ must be a hash.
596 # Each option with a string key specifies an extra header field for HTTP.
597 # I.e., it is ignored for FTP without HTTP proxy.
599 # The hash may include other options, where keys are symbols:
603 # :proxy => "http://proxy.foo.com:8000/"
604 # :proxy => URI.parse("http://proxy.foo.com:8000/")
609 # If :proxy option is specified, the value should be String, URI,
612 # When String or URI is given, it is treated as proxy URI.
614 # When true is given or the option itself is not specified,
615 # environment variable `scheme_proxy' is examined.
616 # `scheme' is replaced by `http', `https' or `ftp'.
618 # When false or nil is given, the environment variables are ignored and
619 # connection will be made to a server directly.
621 # [:proxy_http_basic_authentication]
623 # :proxy_http_basic_authentication =>
624 # ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
625 # :proxy_http_basic_authentication =>
626 # [URI.parse("http://proxy.foo.com:8000/"),
627 # "proxy-user", "proxy-password"]
629 # If :proxy option is specified, the value should be an Array with 3
630 # elements. It should contain a proxy URI, a proxy user name and a proxy
631 # password. The proxy URI should be a String, an URI or nil. The proxy
632 # user name and password should be a String.
634 # If nil is given for the proxy URI, this option is just ignored.
636 # If :proxy and :proxy_http_basic_authentication is specified,
637 # ArgumentError is raised.
639 # [:http_basic_authentication]
641 # :http_basic_authentication=>[user, password]
643 # If :http_basic_authentication is specified,
644 # the value should be an array which contains 2 strings:
645 # username and password.
646 # It is used for HTTP Basic authentication defined by RFC 2617.
648 # [:content_length_proc]
650 # :content_length_proc => lambda {|content_length| ... }
652 # If :content_length_proc option is specified, the option value procedure
653 # is called before actual transfer is started.
654 # It takes one argument, which is expected content length in bytes.
656 # If two or more transfers are performed by HTTP redirection, the
657 # procedure is called only once for the last transfer.
659 # When expected content length is unknown, the procedure is called with
660 # nil. This happens when the HTTP response has no Content-Length header.
664 # :progress_proc => lambda {|size| ...}
666 # If :progress_proc option is specified, the proc is called with one
667 # argument each time when `open' gets content fragment from network.
668 # The argument +size+ is the accumulated transferred size in bytes.
670 # If two or more transfer is done by HTTP redirection, the procedure
671 # is called only one for a last transfer.
673 # :progress_proc and :content_length_proc are intended to be used for
675 # For example, it can be implemented as follows using Ruby/ProgressBar.
679 # :content_length_proc => lambda {|t|
681 # pbar = ProgressBar.new("...", t)
682 # pbar.file_transfer_mode
685 # :progress_proc => lambda {|s|
691 # :read_timeout=>nil (no timeout)
692 # :read_timeout=>10 (10 second)
694 # :read_timeout option specifies a timeout of read for http connections.
698 # :open_timeout=>nil (no timeout)
699 # :open_timeout=>10 (10 second)
701 # :open_timeout option specifies a timeout of open for http connections.
705 # :ssl_ca_cert=>filename or an Array of filenames
707 # :ssl_ca_cert is used to specify CA certificate for SSL.
708 # If it is given, default certificates are not used.
712 # :ssl_verify_mode=>mode
714 # :ssl_verify_mode is used to specify openssl verify mode.
718 # :ssl_min_version=>:TLS1_2
720 # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol
721 # version. See also OpenSSL::SSL::SSLContext#min_version=.
725 # :ssl_max_version=>:TLS1_2
727 # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol
728 # version. See also OpenSSL::SSL::SSLContext#max_version=.
732 # :ftp_active_mode=>bool
734 # <tt>:ftp_active_mode => true</tt> is used to make ftp active mode.
735 # Ruby 1.9 uses passive mode by default.
736 # Note that the active mode is default in Ruby 1.8 or prior.
742 # +:redirect+ is true by default. <tt>:redirect => false</tt> is used to
743 # disable all HTTP redirects.
745 # OpenURI::HTTPRedirect exception raised on redirection.
746 # Using +true+ also means that redirections between http and ftp are
749 def open(*rest, &block)
750 OpenURI.open_uri(self, *rest, &block)
753 # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and
754 # returns the content as string.
755 # The string is extended with OpenURI::Meta.
756 # The argument +options+ is same as OpenURI::OpenRead#open.
758 self.open(options) {|f|
769 def buffer_open(buf, proxy, options) # :nodoc:
770 OpenURI.open_http(buf, self, proxy, options)
773 include OpenURI::OpenRead
777 def buffer_open(buf, proxy, options) # :nodoc:
779 OpenURI.open_http(buf, self, proxy, options)
786 abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp."
790 path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it.
791 directories = path.split(%r{/}, -1)
792 directories.each {|d|
793 d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
795 unless filename = directories.pop
796 raise ArgumentError, "no filename: #{self.inspect}"
798 directories.each {|d|
800 raise ArgumentError, "invalid directory: #{d.inspect}"
803 if /[\r\n]/ =~ filename
804 raise ArgumentError, "invalid filename: #{filename.inspect}"
806 typecode = self.typecode
807 if typecode && /\A[aid]\z/ !~ typecode
808 raise ArgumentError, "invalid typecode: #{typecode.inspect}"
811 # The access sequence is defined by RFC 1738
813 ftp.connect(self.hostname, self.port)
814 ftp.passive = !options[:ftp_active_mode]
815 # todo: extract user/passwd from .netrc.
818 user, passwd = self.userinfo.split(/:/) if self.userinfo
819 ftp.login(user, passwd)
820 directories.each {|cwd|
821 ftp.voidcmd("CWD #{cwd}")
824 # xxx: typecode D is not handled.
825 ftp.voidcmd("TYPE #{typecode.upcase}")
827 if options[:content_length_proc]
828 options[:content_length_proc].call(ftp.size(filename))
830 ftp.retrbinary("RETR #{filename}", 4096) { |str|
832 options[:progress_proc].call(buf.size) if options[:progress_proc]
838 include OpenURI::OpenRead