beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / luasocket / src / url.lua
blob6ca6d68449911df893762b4b330e044703cd34dd
1 -----------------------------------------------------------------------------
2 -- URI parsing, composition and relative URL resolution
3 -- LuaSocket toolkit.
4 -- Author: Diego Nehab
5 -----------------------------------------------------------------------------
7 -----------------------------------------------------------------------------
8 -- Declare module
9 -----------------------------------------------------------------------------
10 local string = require("string")
11 local base = _G
12 local table = require("table")
13 module("socket.url")
15 -----------------------------------------------------------------------------
16 -- Module version
17 -----------------------------------------------------------------------------
18 _VERSION = "URL 1.0.2"
20 -----------------------------------------------------------------------------
21 -- Encodes a string into its escaped hexadecimal representation
22 -- Input
23 -- s: binary string to be encoded
24 -- Returns
25 -- escaped representation of string binary
26 -----------------------------------------------------------------------------
27 function escape(s)
28 return (string.gsub(s, "([^A-Za-z0-9_])", function(c)
29 return string.format("%%%02x", string.byte(c))
30 end))
31 end
33 -----------------------------------------------------------------------------
34 -- Protects a path segment, to prevent it from interfering with the
35 -- url parsing.
36 -- Input
37 -- s: binary string to be encoded
38 -- Returns
39 -- escaped representation of string binary
40 -----------------------------------------------------------------------------
41 local function make_set(t)
42 local s = {}
43 for i,v in base.ipairs(t) do
44 s[t[i]] = 1
45 end
46 return s
47 end
49 -- these are allowed withing a path segment, along with alphanum
50 -- other characters must be escaped
51 local segment_set = make_set {
52 "-", "_", ".", "!", "~", "*", "'", "(",
53 ")", ":", "@", "&", "=", "+", "$", ",",
56 local function protect_segment(s)
57 return string.gsub(s, "([^A-Za-z0-9_])", function (c)
58 if segment_set[c] then return c
59 else return string.format("%%%02x", string.byte(c)) end
60 end)
61 end
63 -----------------------------------------------------------------------------
64 -- Encodes a string into its escaped hexadecimal representation
65 -- Input
66 -- s: binary string to be encoded
67 -- Returns
68 -- escaped representation of string binary
69 -----------------------------------------------------------------------------
70 function unescape(s)
71 return (string.gsub(s, "%%(%x%x)", function(hex)
72 return string.char(base.tonumber(hex, 16))
73 end))
74 end
76 -----------------------------------------------------------------------------
77 -- Builds a path from a base path and a relative path
78 -- Input
79 -- base_path
80 -- relative_path
81 -- Returns
82 -- corresponding absolute path
83 -----------------------------------------------------------------------------
84 local function absolute_path(base_path, relative_path)
85 if string.sub(relative_path, 1, 1) == "/" then return relative_path end
86 local path = string.gsub(base_path, "[^/]*$", "")
87 path = path .. relative_path
88 path = string.gsub(path, "([^/]*%./)", function (s)
89 if s ~= "./" then return s else return "" end
90 end)
91 path = string.gsub(path, "/%.$", "/")
92 local reduced
93 while reduced ~= path do
94 reduced = path
95 path = string.gsub(reduced, "([^/]*/%.%./)", function (s)
96 if s ~= "../../" then return "" else return s end
97 end)
98 end
99 path = string.gsub(reduced, "([^/]*/%.%.)$", function (s)
100 if s ~= "../.." then return "" else return s end
101 end)
102 return path
105 -----------------------------------------------------------------------------
106 -- Parses a url and returns a table with all its parts according to RFC 2396
107 -- The following grammar describes the names given to the URL parts
108 -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
109 -- <authority> ::= <userinfo>@<host>:<port>
110 -- <userinfo> ::= <user>[:<password>]
111 -- <path> :: = {<segment>/}<segment>
112 -- Input
113 -- url: uniform resource locator of request
114 -- default: table with default values for each field
115 -- Returns
116 -- table with the following fields, where RFC naming conventions have
117 -- been preserved:
118 -- scheme, authority, userinfo, user, password, host, port,
119 -- path, params, query, fragment
120 -- Obs:
121 -- the leading '/' in {/<path>} is considered part of <path>
122 -----------------------------------------------------------------------------
123 function parse(url, default)
124 -- initialize default parameters
125 local parsed = {}
126 for i,v in base.pairs(default or parsed) do parsed[i] = v end
127 -- empty url is parsed to nil
128 if not url or url == "" then return nil, "invalid url" end
129 -- remove whitespace
130 -- url = string.gsub(url, "%s", "")
131 -- get fragment
132 url = string.gsub(url, "#(.*)$", function(f)
133 parsed.fragment = f
134 return ""
135 end)
136 -- get scheme
137 url = string.gsub(url, "^([%w][%w%+%-%.]*)%:",
138 function(s) parsed.scheme = s; return "" end)
139 -- get authority
140 url = string.gsub(url, "^//([^/]*)", function(n)
141 parsed.authority = n
142 return ""
143 end)
144 -- get query string
145 url = string.gsub(url, "%?(.*)", function(q)
146 parsed.query = q
147 return ""
148 end)
149 -- get params
150 url = string.gsub(url, "%;(.*)", function(p)
151 parsed.params = p
152 return ""
153 end)
154 -- path is whatever was left
155 if url ~= "" then parsed.path = url end
156 local authority = parsed.authority
157 if not authority then return parsed end
158 authority = string.gsub(authority,"^([^@]*)@",
159 function(u) parsed.userinfo = u; return "" end)
160 authority = string.gsub(authority, ":([^:%]]*)$",
161 function(p) parsed.port = p; return "" end)
162 if authority ~= "" then
163 -- IPv6?
164 parsed.host = string.match(authority, "^%[(.+)%]$") or authority
166 local userinfo = parsed.userinfo
167 if not userinfo then return parsed end
168 userinfo = string.gsub(userinfo, ":([^:]*)$",
169 function(p) parsed.password = p; return "" end)
170 parsed.user = userinfo
171 return parsed
174 -----------------------------------------------------------------------------
175 -- Rebuilds a parsed URL from its components.
176 -- Components are protected if any reserved or unallowed characters are found
177 -- Input
178 -- parsed: parsed URL, as returned by parse
179 -- Returns
180 -- a stringing with the corresponding URL
181 -----------------------------------------------------------------------------
182 function build(parsed)
183 local ppath = parse_path(parsed.path or "")
184 local url = build_path(ppath)
185 if parsed.params then url = url .. ";" .. parsed.params end
186 if parsed.query then url = url .. "?" .. parsed.query end
187 local authority = parsed.authority
188 if parsed.host then
189 authority = parsed.host
190 if string.find(authority, ":") then -- IPv6?
191 authority = "[" .. authority .. "]"
193 if parsed.port then authority = authority .. ":" .. parsed.port end
194 local userinfo = parsed.userinfo
195 if parsed.user then
196 userinfo = parsed.user
197 if parsed.password then
198 userinfo = userinfo .. ":" .. parsed.password
201 if userinfo then authority = userinfo .. "@" .. authority end
203 if authority then url = "//" .. authority .. url end
204 if parsed.scheme then url = parsed.scheme .. ":" .. url end
205 if parsed.fragment then url = url .. "#" .. parsed.fragment end
206 -- url = string.gsub(url, "%s", "")
207 return url
210 -----------------------------------------------------------------------------
211 -- Builds a absolute URL from a base and a relative URL according to RFC 2396
212 -- Input
213 -- base_url
214 -- relative_url
215 -- Returns
216 -- corresponding absolute url
217 -----------------------------------------------------------------------------
218 function absolute(base_url, relative_url)
219 if base.type(base_url) == "table" then
220 base_parsed = base_url
221 base_url = build(base_parsed)
222 else
223 base_parsed = parse(base_url)
225 local relative_parsed = parse(relative_url)
226 if not base_parsed then return relative_url
227 elseif not relative_parsed then return base_url
228 elseif relative_parsed.scheme then return relative_url
229 else
230 relative_parsed.scheme = base_parsed.scheme
231 if not relative_parsed.authority then
232 relative_parsed.authority = base_parsed.authority
233 if not relative_parsed.path then
234 relative_parsed.path = base_parsed.path
235 if not relative_parsed.params then
236 relative_parsed.params = base_parsed.params
237 if not relative_parsed.query then
238 relative_parsed.query = base_parsed.query
241 else
242 relative_parsed.path = absolute_path(base_parsed.path or "",
243 relative_parsed.path)
246 return build(relative_parsed)
250 -----------------------------------------------------------------------------
251 -- Breaks a path into its segments, unescaping the segments
252 -- Input
253 -- path
254 -- Returns
255 -- segment: a table with one entry per segment
256 -----------------------------------------------------------------------------
257 function parse_path(path)
258 local parsed = {}
259 path = path or ""
260 --path = string.gsub(path, "%s", "")
261 string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
262 for i = 1, #parsed do
263 parsed[i] = unescape(parsed[i])
265 if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
266 if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
267 return parsed
270 -----------------------------------------------------------------------------
271 -- Builds a path component from its segments, escaping protected characters.
272 -- Input
273 -- parsed: path segments
274 -- unsafe: if true, segments are not protected before path is built
275 -- Returns
276 -- path: corresponding path stringing
277 -----------------------------------------------------------------------------
278 function build_path(parsed, unsafe)
279 local path = ""
280 local n = #parsed
281 if unsafe then
282 for i = 1, n-1 do
283 path = path .. parsed[i]
284 path = path .. "/"
286 if n > 0 then
287 path = path .. parsed[n]
288 if parsed.is_directory then path = path .. "/" end
290 else
291 for i = 1, n-1 do
292 path = path .. protect_segment(parsed[i])
293 path = path .. "/"
295 if n > 0 then
296 path = path .. protect_segment(parsed[n])
297 if parsed.is_directory then path = path .. "/" end
300 if parsed.is_absolute then path = "/" .. path end
301 return path