1 ;;; rng-uri.el --- URI parsing and manipulation
3 ;; Copyright (C) 2003, 2007-2011 Free Software Foundation, Inc.
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 (defun rng-file-name-uri (f)
28 "Return a URI for the filename F.
29 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
30 escape them using %HH."
31 (setq f
(expand-file-name f
))
33 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
37 (if (and (> (length url
) 0)
43 (defun rng-uri-escape-multibyte (uri)
44 "Escape multibyte characters in URI."
45 (replace-regexp-in-string "[:nonascii:]"
47 (encode-coding-string uri
'utf-8
)))
49 (defun rng-percent-encode (str)
52 (format "%%%x%x" (/ ch
16) (% ch
16)))
53 (string-to-list str
))))
56 (defun rng-uri-file-name (uri)
57 "Return the filename represented by a URI.
58 Signal an error if URI is not a valid file URL."
59 (rng-uri-file-name-1 uri nil
))
61 (defun rng-uri-pattern-file-name-regexp (pattern)
62 "Return a regexp for filenames represented by URIs that match PATTERN."
63 (rng-uri-file-name-1 pattern
'match
))
65 (defun rng-uri-pattern-file-name-replace-match (pattern)
66 (rng-uri-file-name-1 pattern
'replace
))
68 ;; pattern is either nil or match or replace
69 (defun rng-uri-file-name-1 (uri pattern
)
70 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri
)
71 (rng-uri-error "Bad escapes in URI `%s'" uri
))
72 (setq uri
(rng-uri-unescape-multibyte uri
))
74 (or (rng-uri-split uri
)
75 (rng-uri-error "Cannot split URI `%s' into its components" uri
)))
76 (scheme (nth 0 components
))
77 (authority (nth 1 components
))
78 (path (nth 2 components
))
79 (absolutep (string-match "\\`/" path
))
80 (query (nth 3 components
))
81 (fragment-id (nth 4 components
)))
84 (rng-uri-error "URI `%s' does not have a scheme" uri
)))
85 ((not (string= (downcase scheme
) "file"))
86 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri
)))
87 (when (not (member authority
88 (cons system-name
'(nil "" "localhost"))))
89 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
92 (rng-uri-error "`?' not escaped in file URI `%s'" uri
))
94 (rng-uri-error "URI `%s' has a fragment identifier" uri
))
95 (when (string-match ";" path
)
96 (rng-uri-error "`;' not escaped in URI `%s'" uri
))
97 (when (string-match "%2[fF]" path
) ;; 2f is hex code of slash
98 (rng-uri-error "Escaped slash in URI `%s'" uri
))
99 (when (and (eq system-type
'windows-nt
)
101 (file-name-absolute-p (substring path
1)))
102 (setq path
(substring path
1)))
103 (when (and pattern
(string-match "\\`\\./" path
))
104 (setq path
(substring path
2)))
106 (cond ((eq pattern
'match
)
107 (rng-uri-unescape-unibyte-match path
))
108 ((eq pattern
'replace
)
109 (rng-uri-unescape-unibyte-replace path
2))
111 (rng-uri-unescape-unibyte path
))))
112 (when (string-match "\000" path
)
113 (rng-uri-error "URI `%s' has NUL character in path" uri
))
114 (when (eq pattern
'match
)
116 (concat (if absolutep
118 "\\(\\(?:[^/]*/\\)*\\)")
120 (cond ((eq pattern
'match
)
121 (concat "\\`" path
"\\'"))
122 ((and (eq pattern
'replace
)
127 (defun rng-uri-error (&rest args
)
128 (signal 'rng-uri-error
(list (apply 'format args
))))
130 (put 'rng-uri-error
'error-conditions
'(error rng-uri-error
))
131 (put 'rng-uri-error
'error-message
"Invalid URI")
133 (defun rng-uri-split (str)
134 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
135 \\(?://\\([^/?#]*\\)\\)?\
137 \\(?:\\?\\([^#]*\\)\\)?\
138 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
140 (list (match-string 1 str
)
144 (match-string 5 str
))))
146 (defun rng-uri-join (scheme authority path
&optional query fragment-id
)
150 (setq parts
(list "#" fragment-id
)))
154 (cons query parts
))))
155 (setq parts
(cons path parts
))
159 (cons authority parts
))))
164 (apply 'concat parts
))))
166 (defun rng-uri-resolve (uri-ref base-uri
)
167 "Resolve a possibly relative URI reference into absolute form.
168 URI-REF is the URI reference to be resolved.
169 BASE-URI is the base URI to use for resolving it.
170 The algorithm is specified by RFC 2396.
171 If there is some problem with URI-REF or BASE-URI, then
172 URI-REF will be returned."
173 (let* ((components (rng-uri-split uri-ref
))
174 (scheme (nth 0 components
))
175 (authority (nth 1 components
))
176 (path (nth 2 components
))
177 (query (nth 3 components
))
178 (fragment-id (nth 4 components
))
179 (base-components (rng-uri-split base-uri
)))
180 (if (or (not components
)
182 (not base-components
)
183 (not (nth 0 base-components
)))
185 (setq scheme
(nth 0 base-components
))
186 (when (not authority
)
187 (setq authority
(nth 1 base-components
))
188 (if (and (equal path
"") (not query
))
189 ;; Handle same document reference by returning
190 ;; same URI (RFC 2396bis does this too).
191 (setq path
(nth 2 base-components
)
192 query
(nth 3 base-components
))
193 (setq path
(rng-resolve-path path
(nth 2 base-components
)))))
200 ;; See RFC 2396 5.2, steps 5 and 6
201 (defun rng-resolve-path (path base-path
)
203 (if (or (string-match "\\`/" path
)
204 (not (string-match "\\`/" base-path
)))
208 (let ((segments (rng-split-path path
))
209 (base-segments (rng-split-path base-path
)))
210 (if (> (length base-segments
) 1)
211 (setq segments
(nconc (nbutlast base-segments
)
214 (concat (car base-segments
) (car segments
))))
216 (let ((last-segment (last segments
)))
217 (when (equal (car last-segment
) ".")
218 (setcar last-segment
"")))
220 (setq segments
(delete "." segments
))
225 (setq iter
(cdr segments
))
226 (while (and iter
(not matched
))
227 (if (or (not (equal (cadr iter
) ".."))
228 (equal (car iter
) ".."))
229 (setq iter
(cdr iter
))
233 (if (cddr iter
) nil
""))
235 (setq segments
(delq nil segments
))))
237 (rng-join-path segments
))))
239 (defun rng-relative-uri (full base
)
240 "Return a URI that relative to BASE is equivalent to FULL.
241 The returned URI will be relative if possible.
242 Both FULL and BASE must be absolute URIs."
243 (let* ((components (rng-uri-split full
))
244 (scheme (nth 0 components
))
245 (authority (nth 1 components
))
246 (path (nth 2 components
))
247 (query (nth 3 components
))
248 (fragment-id (nth 4 components
))
249 (base-components (rng-uri-split base
)))
254 (nth 0 base-components
)))
259 (nth 1 base-components
)))
261 (setq path
(rng-relative-path path
(nth 2 base-components
))))
262 (rng-uri-join scheme authority path query fragment-id
))
265 (defun rng-relative-path (path base-path
)
266 (let ((segments (rng-split-path path
))
267 (base-segments (rng-split-path base-path
)))
268 (when (> (length base-segments
) 1)
269 (setq base-segments
(nbutlast base-segments
)))
270 (if (or (member "." segments
)
271 (member ".." segments
)
272 (member "." base-segments
)
273 (member ".." base-segments
))
277 (string= (car segments
)
278 (car base-segments
)))
279 (setq segments
(cdr segments
))
280 (setq base-segments
(cdr base-segments
)))
282 (setq base-segments
(cdr base-segments
))
283 (setq segments
(cons ".." segments
)))
284 (when (equal (car segments
) "")
285 (setq segments
(cons "." segments
)))
286 (rng-join-path segments
))))
288 (defun rng-split-path (path)
291 (while (string-match "/" path start
)
292 (setq segments
(cons (substring path start
(match-beginning 0))
294 (setq start
(match-end 0)))
295 (nreverse (cons (substring path start
) segments
))))
297 (defun rng-join-path (segments)
299 (mapconcat 'identity segments
"/")))
301 (defun rng-uri-unescape-multibyte (str)
302 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
303 'rng-multibyte-percent-decode
306 (defun rng-multibyte-percent-decode (str)
307 (decode-coding-string (apply 'string
308 (mapcar (lambda (h) (string-to-number h
16))
309 (split-string str
"%")))
312 (defun rng-uri-unescape-unibyte (str)
313 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
315 (string-to-number (substring h
1) 16))
320 (defun rng-uri-unescape-unibyte-match (str)
321 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
323 (if (string= match
"*")
326 (if (= (length match
) 1)
328 (string-to-number (substring match
1)
334 (defun rng-uri-unescape-unibyte-replace (str next-match-index
)
335 (replace-regexp-in-string
336 "%[0-7][0-9a-fA-F]\\|[^%]"
338 (if (string= match
"*")
339 (let ((n next-match-index
))
340 (setq next-match-index
(1+ n
))
342 (let ((ch (if (= (length match
) 1)
344 (string-to-number (substring match
1)
355 ;;; rng-uri.el ends here