1 " ------------------------------------------------------------------------------
2 " File: utluri.vim -- module for parsing URIs
3 " Part of the Utl plugin, see ./utl.vim
4 " Author: Stefan Bittner <stb@bf-consulting.de>
5 " Licence: This program is free software; you can redistribute it and/or
6 " modify it under the terms of the GNU General Public License.
7 " See http://www.gnu.org/copyleft/gpl.txt
8 " Version: utl 2.0, $Revision: 1.1.1.1 $
9 " ------------------------------------------------------------------------------
11 " Parses URI-References.
12 " (Can be used independantly from Utl.)
13 " (An URI-Reference is an URI + fragment: myUri#myFragment.
14 " See also <URL:vimhelp:utl-uri-refs>.
15 " Aims to be compliant with <URL:http://www.ietf.org/rfc/rfc2396.txt>
17 " NOTE: The distinction between URI and URI-Reference won't be hold out
18 " (is that correct english? %-\ ). It should be clear from the context.
19 " The fragment goes sometimes with the URI, sometimes not.
24 " let uri = 'http://www.google.com/search?q=vim#tn=ubiquitous'
26 " let scheme = UtlUri_scheme(uri)
27 " let authority = UtlUri_authority(uri)
28 " let path = UtlUri_path(uri)
29 " let query = UtlUri_query(uri)
30 " let fragment = UtlUri_fragment(uri)
33 " let uriRebuilt = UtlUri_build(scheme, authority, path, query, fragment)
35 " " UtlUri_build a new URI
36 " let uriNew = UtlUri_build('file', 'localhost', 'path/to/file', '<undef>', 'myFrag')
38 " let unesc = UtlUri_unescape('a%20b%3f') " -> unesc==`a b?'
41 " Authority, query and fragment can have the <undef> value (literally!)
42 " (similar to undef-value in Perl). That's distinguished from
43 " _empty_ values! Example: http:/// yields UtlUri_authority=='' where as
44 " http:/path/to/file yields UtlUri_authority=='<undef>'.
46 " <URL:http://www.ietf.org/rfc/rfc2396.txt#Note that we must be careful>
49 " Ist not very performant in typical usage (but clear).
50 " s:UtlUri_parse executed n times for getting n components of same uri
52 if exists("loaded_utl_uri")
55 let loaded_utl_uri = 1
58 let g:utl_uri_vim = expand("<sfile>")
61 "------------------------------------------------------------------------------
62 " Parses `uri'. Used by ``public'' functions like UtlUri_path().
63 " - idx selects the component (see below)
64 fu! s:UtlUri_parse(uri, idx)
66 " See <URL:http://www.ietf.org/rfc/rfc2396.txt#^B. Parsing a URI Reference>
68 " ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
77 " (don't touch! ;-) id=_regexparse
78 return substitute(a:uri, '^\(\([^:/?#]\+\):\)\=\(//\([^/?#]*\)\)\=\([^?#]*\)\(?\([^#]*\)\)\=\(#\(.*\)\)\=', '\'.a:idx, '')
82 "-------------------------------------------------------------------------------
83 fu! UtlUri_scheme(uri)
84 let scheme = s:UtlUri_parse(a:uri, 2)
85 " empty scheme impossible (an uri like `://a/b' is interpreted as path = `://a/b').
90 " <URL:http://www.ietf.org/rfc/rfc2396.txt#resiliency>
91 return tolower( scheme )
94 "-------------------------------------------------------------------------------
95 fu! UtlUri_opaque(uri)
96 return s:UtlUri_parse(a:uri, 3) . s:UtlUri_parse(a:uri, 5) . s:UtlUri_parse(a:uri, 6)
99 "-------------------------------------------------------------------------------
100 fu! UtlUri_authority(uri)
101 if s:UtlUri_parse(a:uri, 3) == s:UtlUri_parse(a:uri, 4)
104 return s:UtlUri_parse(a:uri, 4)
108 "-------------------------------------------------------------------------------
110 return s:UtlUri_parse(a:uri, 5)
113 "-------------------------------------------------------------------------------
114 fu! UtlUri_query(uri)
115 if s:UtlUri_parse(a:uri, 6) == s:UtlUri_parse(a:uri, 7)
118 return s:UtlUri_parse(a:uri, 7)
122 "-------------------------------------------------------------------------------
123 fu! UtlUri_fragment(uri)
124 if s:UtlUri_parse(a:uri, 8) == s:UtlUri_parse(a:uri, 9)
127 return s:UtlUri_parse(a:uri, 9)
132 "------------------------------------------------------------------------------
133 " Concatenate uri components into an uri -- opposite of s:UtlUri_parse
134 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#are recombined>
136 " - it should hold: s:UtlUri_parse + UtlUri_build = exactly the original Uri
138 fu! UtlUri_build(scheme, authority, path, query, fragment)
142 if a:scheme != '<undef>'
143 let result = result . a:scheme . ':'
146 if a:authority != '<undef>'
147 let result = result . '//' . a:authority
150 let result = result . a:path
152 if a:query != '<undef>'
153 let result = result . '?' . a:query
156 if a:fragment != '<undef>'
157 let result = result . '#' . a:fragment
164 "------------------------------------------------------------------------------
165 " Constructs an absolute URI from a relative URI `uri' by the help of given
166 " `base' uri and returns it.
169 " <URL:http://www.ietf.org/rfc/rfc2396.txt#^5.2. Resolving Relative References>
170 " - `uri' may already be absolute (i.e. has scheme), is then returned
172 " - `base' should really be absolute! Otherwise the returned Uri will not be
173 " absolute (scheme <undef>). Furthermore `base' should be reasonable (e.g.
174 " have an absolute Path in the case of hierarchical Uri)
176 fu! UtlUri_abs(uri, base)
178 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#If the scheme component>
179 if UtlUri_scheme(a:uri) != '<undef>'
183 let scheme = UtlUri_scheme(a:base)
185 " query, fragment never inherited from base, wether defined or not,
186 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#not inherited from the base URI>
187 let query = UtlUri_query(a:uri)
188 let fragment = UtlUri_fragment(a:uri)
190 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#If the authority component is defined>
191 let authority = UtlUri_authority(a:uri)
192 if authority != '<undef>'
193 return UtlUri_build(scheme, authority, UtlUri_path(a:uri), query, fragment)
196 let authority = UtlUri_authority(a:base)
198 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#If the path component begins>
199 let path = UtlUri_path(a:uri)
201 return UtlUri_build(scheme, authority, path, query, fragment)
204 " see <URL:http://www.ietf.org/rfc/rfc2396.txt#needs to be merged>
207 let new_path = substitute( UtlUri_path(a:base), '[^/]*$', '', '')
209 let new_path = new_path . path
211 " Possible Enhancement: implement the missing steps (purge a/b/../c/ into
214 return UtlUri_build(scheme, authority, new_path, query, fragment)
219 "------------------------------------------------------------------------------
220 " strip eventual #myfrag.
221 " return uri. can be empty
223 fu! UriRef_getUri(uriref)
224 let idx = match(a:uriref, '#')
228 return strpart(a:uriref, 0, idx)
231 "------------------------------------------------------------------------------
232 " strip eventual #myfrag.
233 " return uri. can be empty or <undef>
235 fu! UriRef_getFragment(uriref)
236 let idx = match(a:uriref, '#')
240 return strpart(a:uriref, idx+1, 9999)
244 "------------------------------------------------------------------------------
245 " Unescape unsafe characters in given string,
246 " e.g. transform `10%25%20is%20enough' to `10% is enough'.
248 " - typically string is an uri component (path or fragment)
250 " (see <URL:http://www.ietf.org/rfc/rfc2396.txt#2. URI Characters and Escape Sequences>)
252 fu! UtlUri_unescape(esc)
253 " perl: $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg
257 let ibeg = match(esc, '%[0-9A-Fa-f]\{2}')
261 let chr = nr2char( "0x". esc[ibeg+1] . esc[ibeg+2] )
262 let unesc = unesc . strpart(esc, 0, ibeg) . chr
263 let esc = strpart(esc, ibeg+3, 9999)
268 let &cpo = s:save_cpo