1 -----------------------------------------------------------------------------
2 -- URI parsing, composition and relative URL resolution
5 -----------------------------------------------------------------------------
7 -----------------------------------------------------------------------------
9 -----------------------------------------------------------------------------
10 local string = require("string")
12 local table = require("table")
15 -----------------------------------------------------------------------------
17 -----------------------------------------------------------------------------
18 _VERSION
= "URL 1.0.2"
20 -----------------------------------------------------------------------------
21 -- Encodes a string into its escaped hexadecimal representation
23 -- s: binary string to be encoded
25 -- escaped representation of string binary
26 -----------------------------------------------------------------------------
28 return (string.gsub(s
, "([^A-Za-z0-9_])", function(c
)
29 return string.format("%%%02x", string.byte(c
))
33 -----------------------------------------------------------------------------
34 -- Protects a path segment, to prevent it from interfering with the
37 -- s: binary string to be encoded
39 -- escaped representation of string binary
40 -----------------------------------------------------------------------------
41 local function make_set(t
)
43 for i
,v
in base
.ipairs(t
) do
49 -- these are allowed withing a path segment, along with alphanum
50 -- other characters must be escaped
51 local segment_set
= make_set
{
52 "-", "_", ".", "!", "~", "*", "'", "(",
53 ")", ":", "@", "&", "=", "+", "$", ",",
56 local function protect_segment(s
)
57 return string.gsub(s
, "([^A-Za-z0-9_])", function (c
)
58 if segment_set
[c
] then return c
59 else return string.format("%%%02x", string.byte(c
)) end
63 -----------------------------------------------------------------------------
64 -- Encodes a string into its escaped hexadecimal representation
66 -- s: binary string to be encoded
68 -- escaped representation of string binary
69 -----------------------------------------------------------------------------
71 return (string.gsub(s
, "%%(%x%x)", function(hex
)
72 return string.char(base
.tonumber(hex
, 16))
76 -----------------------------------------------------------------------------
77 -- Builds a path from a base path and a relative path
82 -- corresponding absolute path
83 -----------------------------------------------------------------------------
84 local function absolute_path(base_path
, relative_path
)
85 if string.sub(relative_path
, 1, 1) == "/" then return relative_path
end
86 local path
= string.gsub(base_path
, "[^/]*$", "")
87 path
= path
.. relative_path
88 path
= string.gsub(path
, "([^/]*%./)", function (s
)
89 if s
~= "./" then return s
else return "" end
91 path
= string.gsub(path
, "/%.$", "/")
93 while reduced
~= path
do
95 path
= string.gsub(reduced
, "([^/]*/%.%./)", function (s
)
96 if s
~= "../../" then return "" else return s
end
99 path
= string.gsub(reduced
, "([^/]*/%.%.)$", function (s
)
100 if s
~= "../.." then return "" else return s
end
105 -----------------------------------------------------------------------------
106 -- Parses a url and returns a table with all its parts according to RFC 2396
107 -- The following grammar describes the names given to the URL parts
108 -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
109 -- <authority> ::= <userinfo>@<host>:<port>
110 -- <userinfo> ::= <user>[:<password>]
111 -- <path> :: = {<segment>/}<segment>
113 -- url: uniform resource locator of request
114 -- default: table with default values for each field
116 -- table with the following fields, where RFC naming conventions have
118 -- scheme, authority, userinfo, user, password, host, port,
119 -- path, params, query, fragment
121 -- the leading '/' in {/<path>} is considered part of <path>
122 -----------------------------------------------------------------------------
123 function parse(url
, default
)
124 -- initialize default parameters
126 for i
,v
in base
.pairs(default
or parsed
) do parsed
[i
] = v
end
127 -- empty url is parsed to nil
128 if not url
or url
== "" then return nil, "invalid url" end
130 -- url = string.gsub(url, "%s", "")
132 url
= string.gsub(url
, "#(.*)$", function(f
)
137 url
= string.gsub(url
, "^([%w][%w%+%-%.]*)%:",
138 function(s
) parsed
.scheme
= s
; return "" end)
140 url
= string.gsub(url
, "^//([^/]*)", function(n
)
145 url
= string.gsub(url
, "%?(.*)", function(q
)
150 url
= string.gsub(url
, "%;(.*)", function(p
)
154 -- path is whatever was left
155 if url
~= "" then parsed
.path
= url
end
156 local authority
= parsed
.authority
157 if not authority
then return parsed
end
158 authority
= string.gsub(authority
,"^([^@]*)@",
159 function(u
) parsed
.userinfo
= u
; return "" end)
160 authority
= string.gsub(authority
, ":([^:%]]*)$",
161 function(p
) parsed
.port
= p
; return "" end)
162 if authority
~= "" then
164 parsed
.host
= string.match(authority
, "^%[(.+)%]$") or authority
166 local userinfo
= parsed
.userinfo
167 if not userinfo
then return parsed
end
168 userinfo
= string.gsub(userinfo
, ":([^:]*)$",
169 function(p
) parsed
.password
= p
; return "" end)
170 parsed
.user
= userinfo
174 -----------------------------------------------------------------------------
175 -- Rebuilds a parsed URL from its components.
176 -- Components are protected if any reserved or unallowed characters are found
178 -- parsed: parsed URL, as returned by parse
180 -- a stringing with the corresponding URL
181 -----------------------------------------------------------------------------
182 function build(parsed
)
183 local ppath
= parse_path(parsed
.path
or "")
184 local url
= build_path(ppath
)
185 if parsed
.params
then url
= url
.. ";" .. parsed
.params
end
186 if parsed
.query
then url
= url
.. "?" .. parsed
.query
end
187 local authority
= parsed
.authority
189 authority
= parsed
.host
190 if string.find(authority
, ":") then -- IPv6?
191 authority
= "[" .. authority
.. "]"
193 if parsed
.port
then authority
= authority
.. ":" .. parsed
.port
end
194 local userinfo
= parsed
.userinfo
196 userinfo
= parsed
.user
197 if parsed
.password
then
198 userinfo
= userinfo
.. ":" .. parsed
.password
201 if userinfo
then authority
= userinfo
.. "@" .. authority
end
203 if authority
then url
= "//" .. authority
.. url
end
204 if parsed
.scheme
then url
= parsed
.scheme
.. ":" .. url
end
205 if parsed
.fragment
then url
= url
.. "#" .. parsed
.fragment
end
206 -- url = string.gsub(url, "%s", "")
210 -----------------------------------------------------------------------------
211 -- Builds a absolute URL from a base and a relative URL according to RFC 2396
216 -- corresponding absolute url
217 -----------------------------------------------------------------------------
218 function absolute(base_url
, relative_url
)
219 if base
.type(base_url
) == "table" then
220 base_parsed
= base_url
221 base_url
= build(base_parsed
)
223 base_parsed
= parse(base_url
)
225 local relative_parsed
= parse(relative_url
)
226 if not base_parsed
then return relative_url
227 elseif not relative_parsed
then return base_url
228 elseif relative_parsed
.scheme
then return relative_url
230 relative_parsed
.scheme
= base_parsed
.scheme
231 if not relative_parsed
.authority
then
232 relative_parsed
.authority
= base_parsed
.authority
233 if not relative_parsed
.path
then
234 relative_parsed
.path
= base_parsed
.path
235 if not relative_parsed
.params
then
236 relative_parsed
.params
= base_parsed
.params
237 if not relative_parsed
.query
then
238 relative_parsed
.query
= base_parsed
.query
242 relative_parsed
.path
= absolute_path(base_parsed
.path
or "",
243 relative_parsed
.path
)
246 return build(relative_parsed
)
250 -----------------------------------------------------------------------------
251 -- Breaks a path into its segments, unescaping the segments
255 -- segment: a table with one entry per segment
256 -----------------------------------------------------------------------------
257 function parse_path(path
)
260 --path = string.gsub(path, "%s", "")
261 string.gsub(path
, "([^/]+)", function (s
) table.insert(parsed
, s
) end)
262 for i
= 1, #parsed
do
263 parsed
[i
] = unescape(parsed
[i
])
265 if string.sub(path
, 1, 1) == "/" then parsed
.is_absolute
= 1 end
266 if string.sub(path
, -1, -1) == "/" then parsed
.is_directory
= 1 end
270 -----------------------------------------------------------------------------
271 -- Builds a path component from its segments, escaping protected characters.
273 -- parsed: path segments
274 -- unsafe: if true, segments are not protected before path is built
276 -- path: corresponding path stringing
277 -----------------------------------------------------------------------------
278 function build_path(parsed
, unsafe
)
283 path
= path
.. parsed
[i
]
287 path
= path
.. parsed
[n
]
288 if parsed
.is_directory
then path
= path
.. "/" end
292 path
= path
.. protect_segment(parsed
[i
])
296 path
= path
.. protect_segment(parsed
[n
])
297 if parsed
.is_directory
then path
= path
.. "/" end
300 if parsed
.is_absolute
then path
= "/" .. path
end