Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / url / url-parse.el
blob50de84f5167e9be850c8c7e74e37eb23f7ffddd0
1 ;;; url-parse.el --- Uniform Resource Locator parser
3 ;; Copyright (C) 1996-1999, 2004-2014 Free Software Foundation, Inc.
5 ;; Keywords: comm, data, processes
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22 ;;; Commentary:
24 ;;; Code:
26 (require 'url-vars)
27 (require 'auth-source)
28 (eval-when-compile (require 'cl-lib))
30 (autoload 'url-scheme-get-property "url-methods")
32 (cl-defstruct (url
33 (:constructor nil)
34 (:constructor url-parse-make-urlobj
35 (&optional type user password host portspec filename
36 target attributes fullness))
37 (:copier nil))
38 type user password host portspec filename target attributes fullness
39 silent (use-cookies t))
41 (defsubst url-port (urlobj)
42 "Return the port number for the URL specified by URLOBJ.
43 If the port spec is nil (i.e. URLOBJ specifies no port number),
44 return the default port number for URLOBJ's scheme."
45 (declare (gv-setter (lambda (port) `(setf (url-portspec ,urlobj) ,port))))
46 (or (url-portspec urlobj)
47 (if (url-type urlobj)
48 (url-scheme-get-property (url-type urlobj) 'default-port))))
50 (defun url-path-and-query (urlobj)
51 "Return the path and query components of URLOBJ.
52 These two components are stored together in the FILENAME slot of
53 the object. The return value of this function is (PATH . QUERY),
54 where each of PATH and QUERY are strings or nil."
55 (let ((name (url-filename urlobj))
56 path query)
57 (when name
58 (if (string-match "\\?" name)
59 (setq path (substring name 0 (match-beginning 0))
60 query (substring name (match-end 0)))
61 (setq path name)))
62 (if (equal path "") (setq path nil))
63 (if (equal query "") (setq query nil))
64 (cons path query)))
66 (defun url-port-if-non-default (urlobj)
67 "Return the port number specified by URLOBJ, if it is not the default.
68 If the specified port number is the default, return nil."
69 (let ((port (url-portspec urlobj))
70 type)
71 (and port
72 (or (null (setq type (url-type urlobj)))
73 (not (equal port (url-scheme-get-property type 'default-port))))
74 port)))
76 ;;;###autoload
77 (defun url-recreate-url (urlobj)
78 "Recreate a URL string from the parsed URLOBJ."
79 (let* ((type (url-type urlobj))
80 (user (url-user urlobj))
81 (pass (url-password urlobj))
82 (host (url-host urlobj))
83 ;; RFC 3986: "omit the port component and its : delimiter if
84 ;; port is empty or if its value would be the same as that of
85 ;; the scheme's default."
86 (port (url-port-if-non-default urlobj))
87 (file (url-filename urlobj))
88 (frag (url-target urlobj)))
89 (concat (if type (concat type ":"))
90 (if (url-fullness urlobj) "//")
91 (if (or user pass)
92 (concat user
93 (if pass (concat ":" pass))
94 "@"))
95 host
96 (if port (format ":%d" (url-port urlobj)))
97 (or file "/")
98 (if frag (concat "#" frag)))))
100 (defun url-recreate-url-attributes (urlobj)
101 "Recreate the attributes of an URL string from the parsed URLOBJ."
102 (declare (obsolete nil "24.3"))
103 (when (url-attributes urlobj)
104 (concat ";"
105 (mapconcat (lambda (x)
106 (if (cdr x)
107 (concat (car x) "=" (cdr x))
108 (car x)))
109 (url-attributes urlobj) ";"))))
111 ;;;###autoload
112 (defun url-generic-parse-url (url)
113 "Return an URL-struct of the parts of URL.
114 The CL-style struct contains the following fields:
116 TYPE is the URI scheme (string or nil).
117 USER is the user name (string or nil).
118 PASSWORD is the password (string [deprecated] or nil).
119 HOST is the host (a registered name, IP literal in square
120 brackets, or IPv4 address in dotted-decimal form).
121 PORTSPEC is the specified port (a number), or nil.
122 FILENAME is the path AND the query component of the URI.
123 TARGET is the fragment identifier component (used to refer to a
124 subordinate resource, e.g. a part of a webpage).
125 ATTRIBUTES is nil; this slot originally stored the attribute and
126 value alists for IMAP URIs, but this feature was removed
127 since it conflicts with RFC 3986.
128 FULLNESS is non-nil iff the hierarchical sequence component of
129 the URL starts with two slashes, \"//\".
131 The parser follows RFC 3986, except that it also tries to handle
132 URIs that are not fully specified (e.g. lacking TYPE), and it
133 does not check for or perform %-encoding.
135 Here is an example. The URL
137 foo://bob:pass@example.com:42/a/b/c.dtb?type=animal&name=narwhal#nose
139 parses to
141 TYPE = \"foo\"
142 USER = \"bob\"
143 PASSWORD = \"pass\"
144 HOST = \"example.com\"
145 PORTSPEC = 42
146 FILENAME = \"/a/b/c.dtb?type=animal&name=narwhal\"
147 TARGET = \"nose\"
148 ATTRIBUTES = nil
149 FULLNESS = t"
150 (if (null url)
151 (url-parse-make-urlobj)
152 (with-temp-buffer
153 ;; Don't let those temp-buffer modifications accidentally
154 ;; deactivate the mark of the current-buffer.
155 (let ((deactivate-mark nil))
156 (set-syntax-table url-parse-syntax-table)
157 (erase-buffer)
158 (insert url)
159 (goto-char (point-min))
160 (let ((save-pos (point))
161 scheme user pass host port file fragment full
162 (inhibit-read-only t))
164 ;; 3.1. Scheme
165 ;; This is nil for a URI that is not fully specified.
166 (when (looking-at "\\([a-zA-Z][-a-zA-Z0-9+.]*\\):")
167 (goto-char (match-end 0))
168 (setq save-pos (point))
169 (setq scheme (downcase (match-string 1))))
171 ;; 3.2. Authority
172 (when (looking-at "//")
173 (setq full t)
174 (forward-char 2)
175 (setq save-pos (point))
176 (skip-chars-forward "^/?#")
177 (setq host (buffer-substring save-pos (point)))
178 ;; 3.2.1 User Information
179 (if (string-match "^\\([^@]+\\)@" host)
180 (setq user (match-string 1 host)
181 host (substring host (match-end 0))))
182 (if (and user (string-match "\\`\\([^:]*\\):\\(.*\\)" user))
183 (setq pass (match-string 2 user)
184 user (match-string 1 user)))
185 (cond
186 ;; IPv6 literal address.
187 ((string-match "^\\(\\[[^]]+\\]\\)\\(?::\\([0-9]*\\)\\)?$" host)
188 (setq port (match-string 2 host)
189 host (match-string 1 host)))
190 ;; Registered name or IPv4 address.
191 ((string-match ":\\([0-9]*\\)$" host)
192 (setq port (match-string 1 host)
193 host (substring host 0 (match-beginning 0)))))
194 (cond ((equal port "")
195 (setq port nil))
196 (port
197 (setq port (string-to-number port))))
198 (setq host (downcase host)))
200 ;; Now point is on the / ? or # which terminates the
201 ;; authority, or at the end of the URI, or (if there is no
202 ;; authority) at the beginning of the absolute path.
204 (setq save-pos (point))
205 (if (string= "data" scheme)
206 ;; For the "data" URI scheme, all the rest is the FILE.
207 (setq file (buffer-substring save-pos (point-max)))
208 ;; For hysterical raisins, our data structure returns the
209 ;; path and query components together in one slot.
210 ;; 3.3. Path
211 (skip-chars-forward "^?#")
212 ;; 3.4. Query
213 (when (looking-at "?")
214 (skip-chars-forward "^#"))
215 (setq file (buffer-substring save-pos (point)))
216 ;; 3.5 Fragment
217 (when (looking-at "#")
218 (let ((opoint (point)))
219 (forward-char 1)
220 (unless (eobp)
221 (setq fragment (buffer-substring (point) (point-max))))
222 (delete-region opoint (point-max)))))
224 (if (and host (string-match "%[0-9][0-9]" host))
225 (setq host (url-unhex-string host)))
226 (url-parse-make-urlobj scheme user pass host port file
227 fragment nil full))))))
229 (defmacro url-bit-for-url (method lookfor url)
230 `(let* ((urlobj (url-generic-parse-url url))
231 (bit (funcall ,method urlobj))
232 (methods (list 'url-recreate-url
233 'url-host))
234 auth-info)
235 (while (and (not bit) (> (length methods) 0))
236 (setq auth-info (auth-source-search
237 :max 1
238 :host (funcall (pop methods) urlobj)
239 :port (url-type urlobj)))
240 (setq bit (plist-get (nth 0 auth-info) ,lookfor))
241 (when (functionp bit)
242 (setq bit (funcall bit))))
243 bit))
245 (defun url-user-for-url (url)
246 "Attempt to use .authinfo to find a user for this URL."
247 (url-bit-for-url 'url-user :user url))
249 (defun url-password-for-url (url)
250 "Attempt to use .authinfo to find a password for this URL."
251 (url-bit-for-url 'url-password :secret url))
253 (provide 'url-parse)
255 ;;; url-parse.el ends here