1 # frozen_string_literal: true
7 # Allows the opening of various resources including URIs.
9 # If the first argument responds to the 'open' method, 'open' is called on
10 # it with the rest of the arguments.
12 # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by
13 # URI.parse. If the parsed object responds to the 'open' method,
14 # 'open' is called on it with the rest of the arguments.
16 # Otherwise, Kernel#open is called.
18 # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
19 # URI::FTP#open, Kernel#open.
21 # We can accept URIs and strings that begin with http://, https:// and
22 # ftp://. In these cases, the opened file object is extended by OpenURI::Meta.
23 def self.open(name, *rest, &block)
24 if name.respond_to?(:open)
25 name.open(*rest, &block)
26 elsif name.respond_to?(:to_str) &&
27 %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
28 (uri = URI.parse(name)).respond_to?(:open)
29 uri.open(*rest, &block)
34 singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true)
37 # OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
41 # It is possible to open an http, https or ftp URL as though it were a file:
43 # URI.open("http://www.ruby-lang.org/") {|f|
44 # f.each_line {|line| p line}
47 # The opened file has several getter methods for its meta-information, as
48 # follows, since it is extended by OpenURI::Meta.
50 # URI.open("http://www.ruby-lang.org/en") {|f|
51 # f.each_line {|line| p line}
52 # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
53 # p f.content_type # "text/html"
54 # p f.charset # "iso-8859-1"
55 # p f.content_encoding # []
56 # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
59 # Additional header fields can be specified by an optional hash argument.
61 # URI.open("http://www.ruby-lang.org/en/",
62 # "User-Agent" => "Ruby/#{RUBY_VERSION}",
63 # "From" => "foo@bar.invalid",
64 # "Referer" => "http://www.ruby-lang.org/") {|f|
68 # The environment variables such as http_proxy, https_proxy and ftp_proxy
69 # are in effect by default. Here we disable proxy:
71 # URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
75 # See OpenURI::OpenRead.open and URI.open for more on available options.
77 # URI objects can be opened in a similar way.
79 # uri = URI.parse("http://www.ruby-lang.org/en/")
84 # URI objects can be read directly. The returned string is also extended by
90 # Author:: Tanaka Akira <akr@m17n.org>
98 :proxy_http_basic_authentication => true,
99 :progress_proc => true,
100 :content_length_proc => true,
101 :http_basic_authentication => true,
102 :read_timeout => true,
103 :open_timeout => true,
105 :ssl_verify_mode => nil,
106 :ssl_min_version => nil,
107 :ssl_max_version => nil,
108 :ftp_active_mode => false,
111 :max_redirects => 64,
112 :request_specific_fields => nil,
115 def OpenURI.check_options(options) # :nodoc:
117 next unless Symbol === k
118 unless Options.include? k
119 raise ArgumentError, "unrecognized option: #{k}"
124 def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
125 if !rest.empty? && (String === rest.first || Integer === rest.first)
127 if !rest.empty? && Integer === rest.first
131 return mode, perm, rest
134 def OpenURI.open_uri(name, *rest) # :nodoc:
135 uri = URI::Generic === name ? name : URI.parse(name)
136 mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
137 options = rest.shift if !rest.empty? && Hash === rest.first
138 raise ArgumentError.new("extra arguments") if !rest.empty?
140 OpenURI.check_options(options)
142 if /\Arb?(?:\Z|:([^:]+))/ =~ mode
143 encoding, = $1,Encoding.find($1) if $1
146 if options.has_key? :encoding
148 raise ArgumentError, "encoding specified twice"
150 encoding = Encoding.find(options[:encoding])
152 if options.has_key? :request_specific_fields
153 if !(options[:request_specific_fields].is_a?(Hash) || options[:request_specific_fields].is_a?(Proc))
154 raise ArgumentError, "Invalid request_specific_fields option: #{options[:request_specific_fields].inspect}"
157 unless mode == nil ||
158 mode == 'r' || mode == 'rb' ||
160 raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
163 io = open_loop(uri, options)
164 io.set_encoding(encoding) if encoding
169 if io.respond_to? :close!
172 io.close if !io.closed?
180 def OpenURI.open_loop(uri, options) # :nodoc:
182 proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
183 proxy_opts << :proxy if options.include? :proxy
185 if 1 < proxy_opts.length
186 raise ArgumentError, "multiple proxy options specified"
188 case proxy_opts.first
189 when :proxy_http_basic_authentication
190 opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
191 proxy_user = proxy_user.to_str
192 proxy_pass = proxy_pass.to_str
194 raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
197 opt_proxy = options.fetch(:proxy)
207 find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
209 find_proxy = lambda {|u| nil}
211 opt_proxy = URI.parse(opt_proxy)
212 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
214 find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
216 raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
220 max_redirects = options[:max_redirects] || Options.fetch(:max_redirects)
223 request_specific_fields = {}
224 if options.has_key? :request_specific_fields
225 request_specific_fields = if options[:request_specific_fields].is_a?(Hash)
226 options[:request_specific_fields]
227 else options[:request_specific_fields].is_a?(Proc)
228 options[:request_specific_fields].call(uri)
231 redirect = catch(:open_uri_redirect) {
233 uri.buffer_open(buf, find_proxy.call(uri), options.merge(request_specific_fields))
237 if redirect.relative?
238 # Although it violates RFC2616, Location: field may have relative
239 # URI. It is converted to absolute URI using uri as a base URI.
240 redirect = uri + redirect
242 if !options.fetch(:redirect, true)
243 raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
245 unless OpenURI.redirectable?(uri, redirect)
246 raise "redirection forbidden: #{uri} -> #{redirect}"
248 if options.include? :http_basic_authentication
249 # send authentication only for the URI directly specified.
250 options = options.dup
251 options.delete :http_basic_authentication
253 if options.include?(:request_specific_fields) && options[:request_specific_fields].is_a?(Hash)
254 # Send request specific headers only for the initial request.
255 options.delete :request_specific_fields
258 raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
259 uri_set[uri.to_s] = true
260 raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects
270 def OpenURI.redirectable?(uri1, uri2) # :nodoc:
271 # This test is intended to forbid a redirection from http://... to
272 # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
273 # https to http redirect is also forbidden intentionally.
274 # It avoids sending secure cookie or referer by non-secure HTTP protocol.
275 # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
276 # However this is ad hoc. It should be extensible/configurable.
277 uri1.scheme.downcase == uri2.scheme.downcase ||
278 (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
281 def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
283 proxy_uri, proxy_user, proxy_pass = proxy
284 raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
288 raise ArgumentError, "userinfo not supported. [RFC3986]"
292 options.each {|k, v| header[k] = v if String === k }
296 if URI::HTTP === target
299 unless proxy_user && proxy_pass
300 proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
302 if proxy_user && proxy_pass
303 klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
305 klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
308 target_host = target.hostname
309 target_port = target.port
310 request_uri = target.request_uri
312 # FTP over HTTP proxy
313 target_host = proxy_uri.hostname
314 target_port = proxy_uri.port
315 request_uri = target.to_s
316 if proxy_user && proxy_pass
317 header["Proxy-Authorization"] =
318 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
322 http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
323 if target.class == URI::HTTPS
326 http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
327 http.min_version = options[:ssl_min_version]
328 http.max_version = options[:ssl_max_version]
329 store = OpenSSL::X509::Store.new
330 if options[:ssl_ca_cert]
331 Array(options[:ssl_ca_cert]).each do |cert|
332 if File.directory? cert
339 store.set_default_paths
341 http.cert_store = store
343 if options.include? :read_timeout
344 http.read_timeout = options[:read_timeout]
346 if options.include? :open_timeout
347 http.open_timeout = options[:open_timeout]
352 req = Net::HTTP::Get.new(request_uri, header)
353 if options.include? :http_basic_authentication
354 user, pass = options[:http_basic_authentication]
355 req.basic_auth user, pass
357 http.request(req) {|response|
359 if options[:content_length_proc] && Net::HTTPSuccess === resp
360 if resp.key?('Content-Length')
361 options[:content_length_proc].call(resp['Content-Length'].to_i)
363 options[:content_length_proc].call(nil)
366 resp.read_body {|str|
368 if options[:progress_proc] && Net::HTTPSuccess === resp
369 options[:progress_proc].call(buf.size)
377 io.status = [resp.code, resp.message]
378 resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
380 when Net::HTTPSuccess
381 when Net::HTTPMovedPermanently, # 301
382 Net::HTTPFound, # 302
383 Net::HTTPSeeOther, # 303
384 Net::HTTPTemporaryRedirect, # 307
385 Net::HTTPPermanentRedirect # 308
387 loc_uri = URI.parse(resp['location'])
388 rescue URI::InvalidURIError
389 raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
391 throw :open_uri_redirect, loc_uri
393 raise OpenURI::HTTPError.new(io.status.join(' '), io)
397 class HTTPError < StandardError
398 def initialize(message, io)
405 # Raised on redirection,
406 # only occurs when +redirect+ option for HTTP is +false+.
407 class HTTPRedirect < HTTPError
408 def initialize(message, io, uri)
415 class TooManyRedirects < HTTPError
418 class Buffer # :nodoc: all
429 if StringIO === @io && StringMax < @size
431 io = Tempfile.new('open-uri')
433 Meta.init io, @io if Meta === @io
440 Meta.init @io unless Meta === @io
446 RE_LWS = /[\r\n\t ]+/n
447 RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
448 RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
449 RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
452 # Mixin for holding meta-information.
454 def Meta.init(obj, src=nil) # :nodoc:
458 @meta = {} # name to string. legacy.
459 @metas = {} # name to array of strings.
462 obj.status = src.status
463 obj.base_uri = src.base_uri
464 src.metas.each {|name, values|
465 obj.meta_add_field2(name, values)
470 # returns an Array that consists of status code and message.
471 attr_accessor :status
473 # returns a URI that is the base of relative URIs in the data.
474 # It may differ from the URI supplied by a user due to redirection.
475 attr_accessor :base_uri
477 # returns a Hash that represents header fields.
478 # The Hash keys are downcased for canonicalization.
479 # The Hash values are a field body.
480 # If there are multiple field with same field name,
481 # the field values are concatenated with a comma.
484 # returns a Hash that represents header fields.
485 # The Hash keys are downcased for canonicalization.
486 # The Hash value are an array of field values.
489 def meta_setup_encoding # :nodoc:
490 charset = self.charset
494 enc = Encoding.find(charset)
498 enc = Encoding::ASCII_8BIT unless enc
499 if self.respond_to? :force_encoding
500 self.force_encoding(enc)
501 elsif self.respond_to? :string
502 self.string.force_encoding(enc)
504 self.set_encoding enc
508 def meta_add_field2(name, values) # :nodoc:
510 @metas[name] = values
511 @meta[name] = values.join(', ')
512 meta_setup_encoding if name == 'content-type'
515 def meta_add_field(name, value) # :nodoc:
516 meta_add_field2(name, [value])
519 # returns a Time that represents the Last-Modified field.
521 if vs = @metas['last-modified']
529 def content_type_parse # :nodoc:
530 vs = @metas['content-type']
531 # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
532 if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ')
534 subtype = $2.downcase
536 $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
538 val = qval[1...-1].gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& }
540 parameters << [att.downcase, val]
542 ["#{type}/#{subtype}", *parameters]
548 # returns "type/subtype" which is MIME Content-Type.
549 # It is downcased for canonicalization.
550 # Content-Type parameters are stripped.
552 type, *_ = content_type_parse
553 type || 'application/octet-stream'
556 # returns a charset parameter in Content-Type field.
557 # It is downcased for canonicalization.
559 # If charset parameter is not given but a block is given,
560 # the block is called and its result is returned.
561 # It can be used to guess charset.
563 # If charset parameter and block is not given,
564 # nil is returned except text type.
565 # In that case, "utf-8" is returned as defined by RFC6838 4.2.1
567 type, *parameters = content_type_parse
568 if pair = parameters.assoc('charset')
572 elsif type && %r{\Atext/} =~ type
573 "utf-8" # RFC6838 4.2.1
579 # Returns a list of encodings in Content-Encoding field as an array of
582 # The encodings are downcased for canonicalization.
584 vs = @metas['content-encoding']
585 if vs && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ (v = vs.join(', '))
586 v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
593 # Mixin for HTTP and FTP URIs.
595 # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
597 # OpenURI::OpenRead#open takes optional 3 arguments as:
599 # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
601 # OpenURI::OpenRead#open returns an IO-like object if block is not given.
602 # Otherwise it yields the IO object and return the value of the block.
603 # The IO object is extended with OpenURI::Meta.
605 # +mode+ and +perm+ are the same as Kernel#open.
607 # However, +mode+ must be read mode because OpenURI::OpenRead#open doesn't
608 # support write mode (yet).
609 # Also +perm+ is ignored because it is meaningful only for file creation.
611 # +options+ must be a hash.
613 # Each option with a string key specifies an extra header field for HTTP.
614 # I.e., it is ignored for FTP without HTTP proxy.
616 # The hash may include other options, where keys are symbols:
620 # :proxy => "http://proxy.foo.com:8000/"
621 # :proxy => URI.parse("http://proxy.foo.com:8000/")
626 # If :proxy option is specified, the value should be String, URI,
629 # When String or URI is given, it is treated as proxy URI.
631 # When true is given or the option itself is not specified,
632 # environment variable `scheme_proxy' is examined.
633 # `scheme' is replaced by `http', `https' or `ftp'.
635 # When false or nil is given, the environment variables are ignored and
636 # connection will be made to a server directly.
638 # [:proxy_http_basic_authentication]
640 # :proxy_http_basic_authentication =>
641 # ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
642 # :proxy_http_basic_authentication =>
643 # [URI.parse("http://proxy.foo.com:8000/"),
644 # "proxy-user", "proxy-password"]
646 # If :proxy option is specified, the value should be an Array with 3
647 # elements. It should contain a proxy URI, a proxy user name and a proxy
648 # password. The proxy URI should be a String, an URI or nil. The proxy
649 # user name and password should be a String.
651 # If nil is given for the proxy URI, this option is just ignored.
653 # If :proxy and :proxy_http_basic_authentication is specified,
654 # ArgumentError is raised.
656 # [:http_basic_authentication]
658 # :http_basic_authentication=>[user, password]
660 # If :http_basic_authentication is specified,
661 # the value should be an array which contains 2 strings:
662 # username and password.
663 # It is used for HTTP Basic authentication defined by RFC 2617.
665 # [:content_length_proc]
667 # :content_length_proc => lambda {|content_length| ... }
669 # If :content_length_proc option is specified, the option value procedure
670 # is called before actual transfer is started.
671 # It takes one argument, which is expected content length in bytes.
673 # If two or more transfers are performed by HTTP redirection, the
674 # procedure is called only once for the last transfer.
676 # When expected content length is unknown, the procedure is called with
677 # nil. This happens when the HTTP response has no Content-Length header.
681 # :progress_proc => lambda {|size| ...}
683 # If :progress_proc option is specified, the proc is called with one
684 # argument each time when `open' gets content fragment from network.
685 # The argument +size+ is the accumulated transferred size in bytes.
687 # If two or more transfer is done by HTTP redirection, the procedure
688 # is called only one for a last transfer.
690 # :progress_proc and :content_length_proc are intended to be used for
692 # For example, it can be implemented as follows using Ruby/ProgressBar.
696 # :content_length_proc => lambda {|t|
698 # pbar = ProgressBar.new("...", t)
699 # pbar.file_transfer_mode
702 # :progress_proc => lambda {|s|
708 # :read_timeout=>nil (no timeout)
709 # :read_timeout=>10 (10 second)
711 # :read_timeout option specifies a timeout of read for http connections.
715 # :open_timeout=>nil (no timeout)
716 # :open_timeout=>10 (10 second)
718 # :open_timeout option specifies a timeout of open for http connections.
722 # :ssl_ca_cert=>filename or an Array of filenames
724 # :ssl_ca_cert is used to specify CA certificate for SSL.
725 # If it is given, default certificates are not used.
729 # :ssl_verify_mode=>mode
731 # :ssl_verify_mode is used to specify openssl verify mode.
735 # :ssl_min_version=>:TLS1_2
737 # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol
738 # version. See also OpenSSL::SSL::SSLContext#min_version=.
742 # :ssl_max_version=>:TLS1_2
744 # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol
745 # version. See also OpenSSL::SSL::SSLContext#max_version=.
749 # :ftp_active_mode=>bool
751 # <tt>:ftp_active_mode => true</tt> is used to make ftp active mode.
752 # Ruby 1.9 uses passive mode by default.
753 # Note that the active mode is default in Ruby 1.8 or prior.
759 # +:redirect+ is true by default. <tt>:redirect => false</tt> is used to
760 # disable all HTTP redirects.
762 # OpenURI::HTTPRedirect exception raised on redirection.
763 # Using +true+ also means that redirections between http and ftp are
768 # :max_redirects=>int
770 # Number of HTTP redirects allowed before OpenURI::TooManyRedirects is raised.
773 # [:request_specific_fields]
775 # :request_specific_fields => {}
776 # :request_specific_fields => lambda {|url| ...}
778 # :request_specific_fields option allows specifying custom header fields that
779 # are sent with the HTTP request. It can be passed as a Hash or a Proc that
780 # gets evaluated on each request and returns a Hash of header fields.
782 # If a Hash is provided, it specifies the headers only for the initial
783 # request and these headers will not be sent on redirects.
785 # If a Proc is provided, it will be executed for each request including
786 # redirects, allowing dynamic header customization based on the request URL.
787 # It is important that the Proc returns a Hash. And this Hash specifies the
788 # headers to be sent with the request.
790 # For Example with Hash
791 # URI.open("http://...",
792 # request_specific_fields: {"Authorization" => "token dummy"}) {|f| ... }
794 # For Example with Proc:
795 # URI.open("http://...",
796 # request_specific_fields: lambda { |uri|
797 # if uri.host == "example.com"
798 # {"Authorization" => "token dummy"}
804 def open(*rest, &block)
805 OpenURI.open_uri(self, *rest, &block)
808 # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and
809 # returns the content as string.
810 # The string is extended with OpenURI::Meta.
811 # The argument +options+ is same as OpenURI::OpenRead#open.
813 self.open(options) {|f|
824 def buffer_open(buf, proxy, options) # :nodoc:
825 OpenURI.open_http(buf, self, proxy, options)
828 include OpenURI::OpenRead
832 def buffer_open(buf, proxy, options) # :nodoc:
834 OpenURI.open_http(buf, self, proxy, options)
841 abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp."
845 path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it.
846 directories = path.split(%r{/}, -1)
847 directories.each {|d|
848 d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
850 unless filename = directories.pop
851 raise ArgumentError, "no filename: #{self.inspect}"
853 directories.each {|d|
855 raise ArgumentError, "invalid directory: #{d.inspect}"
858 if /[\r\n]/ =~ filename
859 raise ArgumentError, "invalid filename: #{filename.inspect}"
861 typecode = self.typecode
862 if typecode && /\A[aid]\z/ !~ typecode
863 raise ArgumentError, "invalid typecode: #{typecode.inspect}"
866 # The access sequence is defined by RFC 1738
868 ftp.connect(self.hostname, self.port)
869 ftp.passive = !options[:ftp_active_mode]
870 # todo: extract user/passwd from .netrc.
873 user, passwd = self.userinfo.split(/:/) if self.userinfo
874 ftp.login(user, passwd)
875 directories.each {|cwd|
876 ftp.voidcmd("CWD #{cwd}")
879 # xxx: typecode D is not handled.
880 ftp.voidcmd("TYPE #{typecode.upcase}")
882 if options[:content_length_proc]
883 options[:content_length_proc].call(ftp.size(filename))
885 ftp.retrbinary("RETR #{filename}", 4096) { |str|
887 options[:progress_proc].call(buf.size) if options[:progress_proc]
893 include OpenURI::OpenRead