3 from test
import test_support
7 RFC1808_BASE
= "http://a/b/c/d;p?q#f"
8 RFC2396_BASE
= "http://a/b/c/d;p?q"
10 class UrlParseTestCase(unittest
.TestCase
):
12 def checkRoundtrips(self
, url
, parsed
, split
):
13 result
= urlparse
.urlparse(url
)
14 self
.assertEqual(result
, parsed
)
15 t
= (result
.scheme
, result
.netloc
, result
.path
,
16 result
.params
, result
.query
, result
.fragment
)
17 self
.assertEqual(t
, parsed
)
18 # put it back together and it should be the same
19 result2
= urlparse
.urlunparse(result
)
20 self
.assertEqual(result2
, url
)
21 self
.assertEqual(result2
, result
.geturl())
23 # the result of geturl() is a fixpoint; we can always parse it
24 # again to get the same result:
25 result3
= urlparse
.urlparse(result
.geturl())
26 self
.assertEqual(result3
.geturl(), result
.geturl())
27 self
.assertEqual(result3
, result
)
28 self
.assertEqual(result3
.scheme
, result
.scheme
)
29 self
.assertEqual(result3
.netloc
, result
.netloc
)
30 self
.assertEqual(result3
.path
, result
.path
)
31 self
.assertEqual(result3
.params
, result
.params
)
32 self
.assertEqual(result3
.query
, result
.query
)
33 self
.assertEqual(result3
.fragment
, result
.fragment
)
34 self
.assertEqual(result3
.username
, result
.username
)
35 self
.assertEqual(result3
.password
, result
.password
)
36 self
.assertEqual(result3
.hostname
, result
.hostname
)
37 self
.assertEqual(result3
.port
, result
.port
)
39 # check the roundtrip using urlsplit() as well
40 result
= urlparse
.urlsplit(url
)
41 self
.assertEqual(result
, split
)
42 t
= (result
.scheme
, result
.netloc
, result
.path
,
43 result
.query
, result
.fragment
)
44 self
.assertEqual(t
, split
)
45 result2
= urlparse
.urlunsplit(result
)
46 self
.assertEqual(result2
, url
)
47 self
.assertEqual(result2
, result
.geturl())
49 # check the fixpoint property of re-parsing the result of geturl()
50 result3
= urlparse
.urlsplit(result
.geturl())
51 self
.assertEqual(result3
.geturl(), result
.geturl())
52 self
.assertEqual(result3
, result
)
53 self
.assertEqual(result3
.scheme
, result
.scheme
)
54 self
.assertEqual(result3
.netloc
, result
.netloc
)
55 self
.assertEqual(result3
.path
, result
.path
)
56 self
.assertEqual(result3
.query
, result
.query
)
57 self
.assertEqual(result3
.fragment
, result
.fragment
)
58 self
.assertEqual(result3
.username
, result
.username
)
59 self
.assertEqual(result3
.password
, result
.password
)
60 self
.assertEqual(result3
.hostname
, result
.hostname
)
61 self
.assertEqual(result3
.port
, result
.port
)
63 def test_roundtrips(self
):
65 ('file:///tmp/junk.txt',
66 ('file', '', '/tmp/junk.txt', '', '', ''),
67 ('file', '', '/tmp/junk.txt', '', '')),
68 ('imap://mail.python.org/mbox1',
69 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
70 ('imap', 'mail.python.org', '/mbox1', '', '')),
71 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
72 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
74 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
76 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
77 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
79 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
82 for url
, parsed
, split
in testcases
:
83 self
.checkRoundtrips(url
, parsed
, split
)
85 def test_http_roundtrips(self
):
86 # urlparse.urlsplit treats 'http:' as an optimized special case,
87 # so we test both 'http:' and 'https:' in all the following.
88 # Three cheers for white box knowledge!
91 ('www.python.org', '', '', '', ''),
92 ('www.python.org', '', '', '')),
93 ('://www.python.org#abc',
94 ('www.python.org', '', '', '', 'abc'),
95 ('www.python.org', '', '', 'abc')),
96 ('://www.python.org?q=abc',
97 ('www.python.org', '', '', 'q=abc', ''),
98 ('www.python.org', '', 'q=abc', '')),
99 ('://www.python.org/#abc',
100 ('www.python.org', '/', '', '', 'abc'),
101 ('www.python.org', '/', '', 'abc')),
103 ('a', '/b/c/d', 'p', 'q', 'f'),
104 ('a', '/b/c/d;p', 'q', 'f')),
106 for scheme
in ('http', 'https'):
107 for url
, parsed
, split
in testcases
:
109 parsed
= (scheme
,) + parsed
110 split
= (scheme
,) + split
111 self
.checkRoundtrips(url
, parsed
, split
)
113 def checkJoin(self
, base
, relurl
, expected
):
114 self
.assertEqual(urlparse
.urljoin(base
, relurl
), expected
,
115 (base
, relurl
, expected
))
117 def test_unparse_parse(self
):
118 for u
in ['Python', './Python']:
119 self
.assertEqual(urlparse
.urlunsplit(urlparse
.urlsplit(u
)), u
)
120 self
.assertEqual(urlparse
.urlunparse(urlparse
.urlparse(u
)), u
)
122 def test_RFC1808(self
):
123 # "normal" cases from RFC 1808:
124 self
.checkJoin(RFC1808_BASE
, 'g:h', 'g:h')
125 self
.checkJoin(RFC1808_BASE
, 'g', 'http://a/b/c/g')
126 self
.checkJoin(RFC1808_BASE
, './g', 'http://a/b/c/g')
127 self
.checkJoin(RFC1808_BASE
, 'g/', 'http://a/b/c/g/')
128 self
.checkJoin(RFC1808_BASE
, '/g', 'http://a/g')
129 self
.checkJoin(RFC1808_BASE
, '//g', 'http://g')
130 self
.checkJoin(RFC1808_BASE
, 'g?y', 'http://a/b/c/g?y')
131 self
.checkJoin(RFC1808_BASE
, 'g?y/./x', 'http://a/b/c/g?y/./x')
132 self
.checkJoin(RFC1808_BASE
, '#s', 'http://a/b/c/d;p?q#s')
133 self
.checkJoin(RFC1808_BASE
, 'g#s', 'http://a/b/c/g#s')
134 self
.checkJoin(RFC1808_BASE
, 'g#s/./x', 'http://a/b/c/g#s/./x')
135 self
.checkJoin(RFC1808_BASE
, 'g?y#s', 'http://a/b/c/g?y#s')
136 self
.checkJoin(RFC1808_BASE
, 'g;x', 'http://a/b/c/g;x')
137 self
.checkJoin(RFC1808_BASE
, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
138 self
.checkJoin(RFC1808_BASE
, '.', 'http://a/b/c/')
139 self
.checkJoin(RFC1808_BASE
, './', 'http://a/b/c/')
140 self
.checkJoin(RFC1808_BASE
, '..', 'http://a/b/')
141 self
.checkJoin(RFC1808_BASE
, '../', 'http://a/b/')
142 self
.checkJoin(RFC1808_BASE
, '../g', 'http://a/b/g')
143 self
.checkJoin(RFC1808_BASE
, '../..', 'http://a/')
144 self
.checkJoin(RFC1808_BASE
, '../../', 'http://a/')
145 self
.checkJoin(RFC1808_BASE
, '../../g', 'http://a/g')
147 # "abnormal" cases from RFC 1808:
148 self
.checkJoin(RFC1808_BASE
, '', 'http://a/b/c/d;p?q#f')
149 self
.checkJoin(RFC1808_BASE
, '../../../g', 'http://a/../g')
150 self
.checkJoin(RFC1808_BASE
, '../../../../g', 'http://a/../../g')
151 self
.checkJoin(RFC1808_BASE
, '/./g', 'http://a/./g')
152 self
.checkJoin(RFC1808_BASE
, '/../g', 'http://a/../g')
153 self
.checkJoin(RFC1808_BASE
, 'g.', 'http://a/b/c/g.')
154 self
.checkJoin(RFC1808_BASE
, '.g', 'http://a/b/c/.g')
155 self
.checkJoin(RFC1808_BASE
, 'g..', 'http://a/b/c/g..')
156 self
.checkJoin(RFC1808_BASE
, '..g', 'http://a/b/c/..g')
157 self
.checkJoin(RFC1808_BASE
, './../g', 'http://a/b/g')
158 self
.checkJoin(RFC1808_BASE
, './g/.', 'http://a/b/c/g/')
159 self
.checkJoin(RFC1808_BASE
, 'g/./h', 'http://a/b/c/g/h')
160 self
.checkJoin(RFC1808_BASE
, 'g/../h', 'http://a/b/c/h')
162 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
163 # so we'll not actually run these tests (which expect 1808 behavior).
164 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
165 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
167 def test_RFC2396(self
):
168 # cases from RFC 2396
170 self
.checkJoin(RFC2396_BASE
, '?y', 'http://a/b/c/?y')
171 self
.checkJoin(RFC2396_BASE
, ';x', 'http://a/b/c/;x')
173 self
.checkJoin(RFC2396_BASE
, 'g:h', 'g:h')
174 self
.checkJoin(RFC2396_BASE
, 'g', 'http://a/b/c/g')
175 self
.checkJoin(RFC2396_BASE
, './g', 'http://a/b/c/g')
176 self
.checkJoin(RFC2396_BASE
, 'g/', 'http://a/b/c/g/')
177 self
.checkJoin(RFC2396_BASE
, '/g', 'http://a/g')
178 self
.checkJoin(RFC2396_BASE
, '//g', 'http://g')
179 self
.checkJoin(RFC2396_BASE
, 'g?y', 'http://a/b/c/g?y')
180 self
.checkJoin(RFC2396_BASE
, '#s', 'http://a/b/c/d;p?q#s')
181 self
.checkJoin(RFC2396_BASE
, 'g#s', 'http://a/b/c/g#s')
182 self
.checkJoin(RFC2396_BASE
, 'g?y#s', 'http://a/b/c/g?y#s')
183 self
.checkJoin(RFC2396_BASE
, 'g;x', 'http://a/b/c/g;x')
184 self
.checkJoin(RFC2396_BASE
, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
185 self
.checkJoin(RFC2396_BASE
, '.', 'http://a/b/c/')
186 self
.checkJoin(RFC2396_BASE
, './', 'http://a/b/c/')
187 self
.checkJoin(RFC2396_BASE
, '..', 'http://a/b/')
188 self
.checkJoin(RFC2396_BASE
, '../', 'http://a/b/')
189 self
.checkJoin(RFC2396_BASE
, '../g', 'http://a/b/g')
190 self
.checkJoin(RFC2396_BASE
, '../..', 'http://a/')
191 self
.checkJoin(RFC2396_BASE
, '../../', 'http://a/')
192 self
.checkJoin(RFC2396_BASE
, '../../g', 'http://a/g')
193 self
.checkJoin(RFC2396_BASE
, '', RFC2396_BASE
)
194 self
.checkJoin(RFC2396_BASE
, '../../../g', 'http://a/../g')
195 self
.checkJoin(RFC2396_BASE
, '../../../../g', 'http://a/../../g')
196 self
.checkJoin(RFC2396_BASE
, '/./g', 'http://a/./g')
197 self
.checkJoin(RFC2396_BASE
, '/../g', 'http://a/../g')
198 self
.checkJoin(RFC2396_BASE
, 'g.', 'http://a/b/c/g.')
199 self
.checkJoin(RFC2396_BASE
, '.g', 'http://a/b/c/.g')
200 self
.checkJoin(RFC2396_BASE
, 'g..', 'http://a/b/c/g..')
201 self
.checkJoin(RFC2396_BASE
, '..g', 'http://a/b/c/..g')
202 self
.checkJoin(RFC2396_BASE
, './../g', 'http://a/b/g')
203 self
.checkJoin(RFC2396_BASE
, './g/.', 'http://a/b/c/g/')
204 self
.checkJoin(RFC2396_BASE
, 'g/./h', 'http://a/b/c/g/h')
205 self
.checkJoin(RFC2396_BASE
, 'g/../h', 'http://a/b/c/h')
206 self
.checkJoin(RFC2396_BASE
, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
207 self
.checkJoin(RFC2396_BASE
, 'g;x=1/../y', 'http://a/b/c/y')
208 self
.checkJoin(RFC2396_BASE
, 'g?y/./x', 'http://a/b/c/g?y/./x')
209 self
.checkJoin(RFC2396_BASE
, 'g?y/../x', 'http://a/b/c/g?y/../x')
210 self
.checkJoin(RFC2396_BASE
, 'g#s/./x', 'http://a/b/c/g#s/./x')
211 self
.checkJoin(RFC2396_BASE
, 'g#s/../x', 'http://a/b/c/g#s/../x')
213 def test_urldefrag(self
):
214 for url
, defrag
, frag
in [
215 ('http://python.org#frag', 'http://python.org', 'frag'),
216 ('http://python.org', 'http://python.org', ''),
217 ('http://python.org/#frag', 'http://python.org/', 'frag'),
218 ('http://python.org/', 'http://python.org/', ''),
219 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
220 ('http://python.org/?q', 'http://python.org/?q', ''),
221 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
222 ('http://python.org/p?q', 'http://python.org/p?q', ''),
223 (RFC1808_BASE
, 'http://a/b/c/d;p?q', 'f'),
224 (RFC2396_BASE
, 'http://a/b/c/d;p?q', ''),
226 self
.assertEqual(urlparse
.urldefrag(url
), (defrag
, frag
))
228 def test_urlsplit_attributes(self
):
229 url
= "HTTP://WWW.PYTHON.ORG/doc/#frag"
230 p
= urlparse
.urlsplit(url
)
231 self
.assertEqual(p
.scheme
, "http")
232 self
.assertEqual(p
.netloc
, "WWW.PYTHON.ORG")
233 self
.assertEqual(p
.path
, "/doc/")
234 self
.assertEqual(p
.query
, "")
235 self
.assertEqual(p
.fragment
, "frag")
236 self
.assertEqual(p
.username
, None)
237 self
.assertEqual(p
.password
, None)
238 self
.assertEqual(p
.hostname
, "www.python.org")
239 self
.assertEqual(p
.port
, None)
240 # geturl() won't return exactly the original URL in this case
241 # since the scheme is always case-normalized
242 #self.assertEqual(p.geturl(), url)
244 url
= "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
245 p
= urlparse
.urlsplit(url
)
246 self
.assertEqual(p
.scheme
, "http")
247 self
.assertEqual(p
.netloc
, "User:Pass@www.python.org:080")
248 self
.assertEqual(p
.path
, "/doc/")
249 self
.assertEqual(p
.query
, "query=yes")
250 self
.assertEqual(p
.fragment
, "frag")
251 self
.assertEqual(p
.username
, "User")
252 self
.assertEqual(p
.password
, "Pass")
253 self
.assertEqual(p
.hostname
, "www.python.org")
254 self
.assertEqual(p
.port
, 80)
255 self
.assertEqual(p
.geturl(), url
)
257 # Addressing issue1698, which suggests Username can contain
258 # "@" characters. Though not RFC compliant, many ftp sites allow
259 # and request email addresses as usernames.
261 url
= "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
262 p
= urlparse
.urlsplit(url
)
263 self
.assertEqual(p
.scheme
, "http")
264 self
.assertEqual(p
.netloc
, "User@example.com:Pass@www.python.org:080")
265 self
.assertEqual(p
.path
, "/doc/")
266 self
.assertEqual(p
.query
, "query=yes")
267 self
.assertEqual(p
.fragment
, "frag")
268 self
.assertEqual(p
.username
, "User@example.com")
269 self
.assertEqual(p
.password
, "Pass")
270 self
.assertEqual(p
.hostname
, "www.python.org")
271 self
.assertEqual(p
.port
, 80)
272 self
.assertEqual(p
.geturl(), url
)
275 def test_attributes_bad_port(self
):
276 """Check handling of non-integer ports."""
277 p
= urlparse
.urlsplit("http://www.example.net:foo")
278 self
.assertEqual(p
.netloc
, "www.example.net:foo")
279 self
.assertRaises(ValueError, lambda: p
.port
)
281 p
= urlparse
.urlparse("http://www.example.net:foo")
282 self
.assertEqual(p
.netloc
, "www.example.net:foo")
283 self
.assertRaises(ValueError, lambda: p
.port
)
285 def test_attributes_without_netloc(self
):
286 # This example is straight from RFC 3261. It looks like it
287 # should allow the username, hostname, and port to be filled
288 # in, but doesn't. Since it's a URI and doesn't use the
289 # scheme://netloc syntax, the netloc and related attributes
290 # should be left empty.
291 uri
= "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
292 p
= urlparse
.urlsplit(uri
)
293 self
.assertEqual(p
.netloc
, "")
294 self
.assertEqual(p
.username
, None)
295 self
.assertEqual(p
.password
, None)
296 self
.assertEqual(p
.hostname
, None)
297 self
.assertEqual(p
.port
, None)
298 self
.assertEqual(p
.geturl(), uri
)
300 p
= urlparse
.urlparse(uri
)
301 self
.assertEqual(p
.netloc
, "")
302 self
.assertEqual(p
.username
, None)
303 self
.assertEqual(p
.password
, None)
304 self
.assertEqual(p
.hostname
, None)
305 self
.assertEqual(p
.port
, None)
306 self
.assertEqual(p
.geturl(), uri
)
308 def test_caching(self
):
309 # Test case for bug #1313119
310 uri
= "http://example.com/doc/"
311 unicode_uri
= unicode(uri
)
313 urlparse
.urlparse(unicode_uri
)
314 p
= urlparse
.urlparse(uri
)
315 self
.assertEqual(type(p
.scheme
), type(uri
))
316 self
.assertEqual(type(p
.hostname
), type(uri
))
317 self
.assertEqual(type(p
.path
), type(uri
))
319 def test_noslash(self
):
320 # Issue 1637: http://foo.com?query is legal
321 self
.assertEqual(urlparse
.urlparse("http://example.com?blahblah=/foo"),
322 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
325 test_support
.run_unittest(UrlParseTestCase
)
327 if __name__
== "__main__":