Improve RFC 3986 conformance of url package.
[emacs.git] / lisp / url / url-parse.el
blob40183a4f5336efa23a68f36e3206b1d552be49bd
1 ;;; url-parse.el --- Uniform Resource Locator parser
3 ;; Copyright (C) 1996-1999, 2004-2012 Free Software Foundation, Inc.
5 ;; Keywords: comm, data, processes
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22 ;;; Commentary:
24 ;;; Code:
26 (require 'url-vars)
27 (require 'auth-source)
28 (eval-when-compile (require 'cl))
30 (autoload 'url-scheme-get-property "url-methods")
32 (defstruct (url
33 (:constructor nil)
34 (:constructor url-parse-make-urlobj
35 (&optional type user password host portspec filename
36 target attributes fullness))
37 (:copier nil))
38 type user password host portspec filename target attributes fullness
39 silent (use-cookies t))
41 (defsubst url-port (urlobj)
42 (or (url-portspec urlobj)
43 (if (url-fullness urlobj)
44 (url-scheme-get-property (url-type urlobj) 'default-port))))
46 (defsetf url-port (urlobj) (port) `(setf (url-portspec ,urlobj) ,port))
48 ;;;###autoload
49 (defun url-recreate-url (urlobj)
50 "Recreate a URL string from the parsed URLOBJ."
51 (let ((type (url-type urlobj))
52 (user (url-user urlobj))
53 (pass (url-password urlobj))
54 (host (url-host urlobj))
55 (port (url-portspec urlobj))
56 (file (url-filename urlobj))
57 (frag (url-target urlobj)))
58 (concat (if type (concat type ":"))
59 (if (url-fullness urlobj) "//")
60 (if (or user pass)
61 (concat user
62 (if pass (concat ":" pass))
63 "@"))
64 host
65 ;; RFC 3986: "omit the port component and its : delimiter
66 ;; if port is empty or if its value would be the same as
67 ;; that of the scheme's default."
68 (and port
69 (or (null type)
70 (not (equal port
71 (url-scheme-get-property type
72 'default-port))))
73 (format ":%d" (url-port urlobj)))
74 (or file "/")
75 (if frag (concat "#" frag)))))
77 (defun url-recreate-url-attributes (urlobj)
78 "Recreate the attributes of an URL string from the parsed URLOBJ."
79 (when (url-attributes urlobj)
80 (concat ";"
81 (mapconcat (lambda (x)
82 (if (cdr x)
83 (concat (car x) "=" (cdr x))
84 (car x)))
85 (url-attributes urlobj) ";"))))
86 (make-obsolete 'url-recreate-url-attributes nil "24.2")
88 ;;;###autoload
89 (defun url-generic-parse-url (url)
90 "Return an URL-struct of the parts of URL.
91 The CL-style struct contains the following fields:
93 TYPE is the URI scheme (string or nil).
94 USER is the user name (string or nil).
95 PASSWORD is the password (string [deprecated] or nil).
96 HOST is the host (a registered name, IP literal in square
97 brackets, or IPv4 address in dotted-decimal form).
98 PORTSPEC is the specified port (a number), or nil.
99 FILENAME is the path AND the query component of the URI.
100 TARGET is the fragment identifier component (used to refer to a
101 subordinate resource, e.g. a part of a webpage).
102 ATTRIBUTES is nil; this slot originally stored the attribute and
103 value alists for IMAP URIs, but this feature was removed
104 since it conflicts with RFC 3986.
105 FULLNESS is non-nil iff the authority component of the URI is
106 present.
108 The parser follows RFC 3986, except that it also tries to handle
109 URIs that are not fully specified (e.g. lacking TYPE), and it
110 does not check for or perform %-encoding.
112 Here is an example. The URL
114 foo://bob:pass@example.com:42/a/b/c.dtb?type=animal&name=narwhal#nose
116 parses to
118 TYPE = \"foo\"
119 USER = \"bob\"
120 PASSWORD = \"pass\"
121 HOST = \"example.com\"
122 PORTSPEC = 42
123 FILENAME = \"/a/b/c.dtb?type=animal&name=narwhal\"
124 TARGET = \"nose\"
125 ATTRIBUTES = nil
126 FULLNESS = t"
127 (if (null url)
128 (url-parse-make-urlobj)
129 (with-temp-buffer
130 ;; Don't let those temp-buffer modifications accidentally
131 ;; deactivate the mark of the current-buffer.
132 (let ((deactivate-mark nil))
133 (set-syntax-table url-parse-syntax-table)
134 (erase-buffer)
135 (insert url)
136 (goto-char (point-min))
137 (let ((save-pos (point))
138 scheme user pass host port file fragment full
139 (inhibit-read-only t))
141 ;; 3.1. Scheme
142 ;; This is nil for a URI that is not fully specified.
143 (when (looking-at "\\([a-zA-Z][-a-zA-Z0-9+.]*\\):")
144 (goto-char (match-end 0))
145 (setq save-pos (point))
146 (setq scheme (downcase (match-string 1))))
148 ;; 3.2. Authority
149 (when (looking-at "//")
150 (setq full t)
151 (forward-char 2)
152 (setq save-pos (point))
153 (skip-chars-forward "^/?#")
154 (setq host (buffer-substring save-pos (point)))
155 ;; 3.2.1 User Information
156 (if (string-match "^\\([^@]+\\)@" host)
157 (setq user (match-string 1 host)
158 host (substring host (match-end 0))))
159 (if (and user (string-match "\\`\\([^:]*\\):\\(.*\\)" user))
160 (setq pass (match-string 2 user)
161 user (match-string 1 user)))
162 (cond
163 ;; IPv6 literal address.
164 ((string-match "^\\(\\[[^]]+\\]\\)\\(?::\\([0-9]*\\)\\)?$" host)
165 (setq port (match-string 2 host)
166 host (match-string 1 host)))
167 ;; Registered name or IPv4 address.
168 ((string-match ":\\([0-9]*\\)$" host)
169 (setq port (match-string 1 host)
170 host (substring host 0 (match-beginning 0)))))
171 (cond ((equal port "")
172 (setq port nil))
173 (port
174 (setq port (string-to-number port))))
175 (setq host (downcase host)))
177 (and (null port)
178 scheme
179 (setq port (url-scheme-get-property scheme 'default-port)))
181 ;; Now point is on the / ? or # which terminates the
182 ;; authority, or at the end of the URI, or (if there is no
183 ;; authority) at the beginning of the absolute path.
185 (setq save-pos (point))
186 (if (string= "data" scheme)
187 ;; For the "data" URI scheme, all the rest is the FILE.
188 (setq file (buffer-substring save-pos (point-max)))
189 ;; For hysterical raisins, our data structure returns the
190 ;; path and query components together in one slot.
191 ;; 3.3. Path
192 (skip-chars-forward "^?#")
193 ;; 3.4. Query
194 (when (looking-at "?")
195 (skip-chars-forward "^#"))
196 (setq file (buffer-substring save-pos (point)))
197 ;; 3.5 Fragment
198 (when (looking-at "#")
199 (let ((opoint (point)))
200 (forward-char 1)
201 (unless (eobp)
202 (setq fragment (buffer-substring (point) (point-max))))
203 (delete-region opoint (point-max)))))
205 (if (and host (string-match "%[0-9][0-9]" host))
206 (setq host (url-unhex-string host)))
207 (url-parse-make-urlobj scheme user pass host port file
208 fragment nil full))))))
210 (defmacro url-bit-for-url (method lookfor url)
211 `(let* ((urlobj (url-generic-parse-url url))
212 (bit (funcall ,method urlobj))
213 (methods (list 'url-recreate-url
214 'url-host))
215 auth-info)
216 (while (and (not bit) (> (length methods) 0))
217 (setq auth-info (auth-source-search
218 :max 1
219 :host (funcall (pop methods) urlobj)
220 :port (url-type urlobj)))
221 (setq bit (plist-get (nth 0 auth-info) ,lookfor))
222 (when (functionp bit)
223 (setq bit (funcall bit))))
224 bit))
226 (defun url-user-for-url (url)
227 "Attempt to use .authinfo to find a user for this URL."
228 (url-bit-for-url 'url-user :user url))
230 (defun url-password-for-url (url)
231 "Attempt to use .authinfo to find a password for this URL."
232 (url-bit-for-url 'url-password :secret url))
234 (provide 'url-parse)
236 ;;; url-parse.el ends here