1 ;;; rng-uri.el --- URI parsing and manipulation
3 ;; Copyright (C) 2003, 2007-2014 Free Software Foundation, Inc.
6 ;; Keywords: wp, hypermedia, languages, XML
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 (defun rng-file-name-uri (f)
28 "Return a URI for the filename F.
29 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
30 escape them using %HH."
31 (setq f
(expand-file-name f
))
33 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
37 (if (and (> (length url
) 0)
43 (defun rng-uri-escape-multibyte (uri)
44 "Escape multibyte characters in URI."
45 (replace-regexp-in-string "[:nonascii:]"
47 (encode-coding-string uri
'utf-8
)))
49 (defun rng-percent-encode (str)
52 (format "%%%x%x" (/ ch
16) (% ch
16)))
53 (string-to-list str
))))
56 (defun rng-uri-file-name (uri)
57 "Return the filename represented by a URI.
58 Signal an error if URI is not a valid file URL."
59 (rng-uri-file-name-1 uri nil
))
61 (defun rng-uri-pattern-file-name-regexp (pattern)
62 "Return a regexp for filenames represented by URIs that match PATTERN."
63 (rng-uri-file-name-1 pattern
'match
))
65 (defun rng-uri-pattern-file-name-replace-match (pattern)
66 (rng-uri-file-name-1 pattern
'replace
))
68 ;; pattern is either nil or match or replace
69 (defun rng-uri-file-name-1 (uri pattern
)
70 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri
)
71 (rng-uri-error "Bad escapes in URI `%s'" uri
))
72 (setq uri
(rng-uri-unescape-multibyte uri
))
74 (or (rng-uri-split uri
)
75 (rng-uri-error "Cannot split URI `%s' into its components" uri
)))
76 (scheme (nth 0 components
))
77 (authority (nth 1 components
))
78 (path (nth 2 components
))
79 (absolutep (string-match "\\`/" path
))
80 (query (nth 3 components
))
81 (fragment-id (nth 4 components
)))
84 (rng-uri-error "URI `%s' does not have a scheme" uri
)))
85 ((not (string= (downcase scheme
) "file"))
86 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri
)))
87 (when (not (member authority
88 (cons system-name
'(nil "" "localhost"))))
89 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
92 (rng-uri-error "`?' not escaped in file URI `%s'" uri
))
94 (rng-uri-error "URI `%s' has a fragment identifier" uri
))
95 (when (string-match ";" path
)
96 (rng-uri-error "`;' not escaped in URI `%s'" uri
))
97 (when (string-match "%2[fF]" path
) ;; 2f is hex code of slash
98 (rng-uri-error "Escaped slash in URI `%s'" uri
))
99 (when (and (eq system-type
'windows-nt
)
101 (file-name-absolute-p (substring path
1)))
102 (setq path
(substring path
1)))
103 (when (and pattern
(string-match "\\`\\./" path
))
104 (setq path
(substring path
2)))
106 (cond ((eq pattern
'match
)
107 (rng-uri-unescape-unibyte-match path
))
108 ((eq pattern
'replace
)
109 (rng-uri-unescape-unibyte-replace path
2))
111 (rng-uri-unescape-unibyte path
))))
112 (when (string-match "\000" path
)
113 (rng-uri-error "URI `%s' has NUL character in path" uri
))
114 (when (eq pattern
'match
)
116 (concat (if absolutep
118 "\\(\\(?:[^/]*/\\)*\\)")
120 (cond ((eq pattern
'match
)
121 (concat "\\`" path
"\\'"))
122 ((and (eq pattern
'replace
)
127 (defun rng-uri-error (&rest args
)
128 (signal 'rng-uri-error
(list (apply 'format args
))))
130 (define-error 'rng-uri-error
"Invalid URI")
132 (defun rng-uri-split (str)
133 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
134 \\(?://\\([^/?#]*\\)\\)?\
136 \\(?:\\?\\([^#]*\\)\\)?\
137 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
139 (list (match-string 1 str
)
143 (match-string 5 str
))))
145 (defun rng-uri-join (scheme authority path
&optional query fragment-id
)
149 (setq parts
(list "#" fragment-id
)))
153 (cons query parts
))))
154 (setq parts
(cons path parts
))
158 (cons authority parts
))))
163 (apply 'concat parts
))))
165 (defun rng-uri-resolve (uri-ref base-uri
)
166 "Resolve a possibly relative URI reference into absolute form.
167 URI-REF is the URI reference to be resolved.
168 BASE-URI is the base URI to use for resolving it.
169 The algorithm is specified by RFC 2396.
170 If there is some problem with URI-REF or BASE-URI, then
171 URI-REF will be returned."
172 (let* ((components (rng-uri-split uri-ref
))
173 (scheme (nth 0 components
))
174 (authority (nth 1 components
))
175 (path (nth 2 components
))
176 (query (nth 3 components
))
177 (fragment-id (nth 4 components
))
178 (base-components (rng-uri-split base-uri
)))
179 (if (or (not components
)
181 (not base-components
)
182 (not (nth 0 base-components
)))
184 (setq scheme
(nth 0 base-components
))
185 (when (not authority
)
186 (setq authority
(nth 1 base-components
))
187 (if (and (equal path
"") (not query
))
188 ;; Handle same document reference by returning
189 ;; same URI (RFC 2396bis does this too).
190 (setq path
(nth 2 base-components
)
191 query
(nth 3 base-components
))
192 (setq path
(rng-resolve-path path
(nth 2 base-components
)))))
199 ;; See RFC 2396 5.2, steps 5 and 6
200 (defun rng-resolve-path (path base-path
)
202 (if (or (string-match "\\`/" path
)
203 (not (string-match "\\`/" base-path
)))
207 (let ((segments (rng-split-path path
))
208 (base-segments (rng-split-path base-path
)))
209 (if (> (length base-segments
) 1)
210 (setq segments
(nconc (nbutlast base-segments
)
213 (concat (car base-segments
) (car segments
))))
215 (let ((last-segment (last segments
)))
216 (when (equal (car last-segment
) ".")
217 (setcar last-segment
"")))
219 (setq segments
(delete "." segments
))
224 (setq iter
(cdr segments
))
225 (while (and iter
(not matched
))
226 (if (or (not (equal (cadr iter
) ".."))
227 (equal (car iter
) ".."))
228 (setq iter
(cdr iter
))
232 (if (cddr iter
) nil
""))
234 (setq segments
(delq nil segments
))))
236 (rng-join-path segments
))))
238 (defun rng-relative-uri (full base
)
239 "Return a URI that relative to BASE is equivalent to FULL.
240 The returned URI will be relative if possible.
241 Both FULL and BASE must be absolute URIs."
242 (let* ((components (rng-uri-split full
))
243 (scheme (nth 0 components
))
244 (authority (nth 1 components
))
245 (path (nth 2 components
))
246 (query (nth 3 components
))
247 (fragment-id (nth 4 components
))
248 (base-components (rng-uri-split base
)))
253 (nth 0 base-components
)))
258 (nth 1 base-components
)))
260 (setq path
(rng-relative-path path
(nth 2 base-components
))))
261 (rng-uri-join scheme authority path query fragment-id
))
264 (defun rng-relative-path (path base-path
)
265 (let ((segments (rng-split-path path
))
266 (base-segments (rng-split-path base-path
)))
267 (when (> (length base-segments
) 1)
268 (setq base-segments
(nbutlast base-segments
)))
269 (if (or (member "." segments
)
270 (member ".." segments
)
271 (member "." base-segments
)
272 (member ".." base-segments
))
276 (string= (car segments
)
277 (car base-segments
)))
278 (setq segments
(cdr segments
))
279 (setq base-segments
(cdr base-segments
)))
281 (setq base-segments
(cdr base-segments
))
282 (setq segments
(cons ".." segments
)))
283 (when (equal (car segments
) "")
284 (setq segments
(cons "." segments
)))
285 (rng-join-path segments
))))
287 (defun rng-split-path (path)
290 (while (string-match "/" path start
)
291 (setq segments
(cons (substring path start
(match-beginning 0))
293 (setq start
(match-end 0)))
294 (nreverse (cons (substring path start
) segments
))))
296 (defun rng-join-path (segments)
298 (mapconcat 'identity segments
"/")))
300 (defun rng-uri-unescape-multibyte (str)
301 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
302 'rng-multibyte-percent-decode
305 (defun rng-multibyte-percent-decode (str)
306 (decode-coding-string (apply 'string
307 (mapcar (lambda (h) (string-to-number h
16))
308 (split-string str
"%")))
311 (defun rng-uri-unescape-unibyte (str)
312 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
314 (string-to-number (substring h
1) 16))
319 (defun rng-uri-unescape-unibyte-match (str)
320 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
322 (if (string= match
"*")
325 (if (= (length match
) 1)
327 (string-to-number (substring match
1)
333 (defun rng-uri-unescape-unibyte-replace (str next-match-index
)
334 (replace-regexp-in-string
335 "%[0-7][0-9a-fA-F]\\|[^%]"
337 (if (string= match
"*")
338 (let ((n next-match-index
))
339 (setq next-match-index
(1+ n
))
341 (let ((ch (if (= (length match
) 1)
343 (string-to-number (substring match
1)
354 ;;; rng-uri.el ends here