1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
37 #include <vlc_common.h>
38 #include <vlc_memstream.h>
43 char *vlc_uri_decode_duplicate (const char *str
)
45 char *buf
= strdup (str
);
46 if (vlc_uri_decode (buf
) == NULL
)
54 char *vlc_uri_decode (char *str
)
56 char *in
= str
, *out
= str
;
61 while ((c
= *(in
++)) != '\0')
67 if (!(hex
[0] = *(in
++)) || !(hex
[1] = *(in
++)))
70 *(out
++) = strtoul (hex
, NULL
, 0x10);
79 static bool isurialnum(int c
)
81 return ((unsigned char)(c
- 'a') < 26)
82 || ((unsigned char)(c
- 'A') < 26)
83 || ((unsigned char)(c
- '0') < 10);
86 static bool isurisafe(int c
)
88 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
89 return isurialnum(c
) || (strchr ("-._~", c
) != NULL
);
92 static bool isurisubdelim(int c
)
94 return strchr("!$&'()*+,;=", c
) != NULL
;
97 static bool isurihex(int c
)
98 { /* Same as isxdigit() but does not depend on locale and unsignedness */
99 return ((unsigned char)(c
- '0') < 10)
100 || ((unsigned char)(c
- 'A') < 6)
101 || ((unsigned char)(c
- 'a') < 6);
104 static const char urihex
[] = "0123456789ABCDEF";
106 static char *encode_URI_bytes (const char *str
, size_t *restrict lenp
)
108 char *buf
= malloc (3 * *lenp
+ 1);
109 if (unlikely(buf
== NULL
))
113 for (size_t i
= 0; i
< *lenp
; i
++)
115 unsigned char c
= str
[i
];
119 /* This is URI encoding, not HTTP forms:
120 * Space is encoded as '%20', not '+'. */
124 *(out
++) = urihex
[c
>> 4];
125 *(out
++) = urihex
[c
& 0xf];
130 out
= realloc (buf
, *lenp
+ 1);
131 return likely(out
!= NULL
) ? out
: buf
;
134 char *vlc_uri_encode (const char *str
)
136 size_t len
= strlen (str
);
137 char *ret
= encode_URI_bytes (str
, &len
);
138 if (likely(ret
!= NULL
))
143 char *vlc_path2uri (const char *path
, const char *scheme
)
150 if (scheme
== NULL
&& !strcmp (path
, "-"))
151 return strdup ("fd://0"); // standard input
152 /* Note: VLC cannot handle URI schemes without double slash after the
153 * scheme name (such as mailto: or news:). */
158 char p
[strlen (path
) + 1];
160 for (buf
= p
; *path
; buf
++, path
++)
161 *buf
= (*path
== '/') ? DIR_SEP_CHAR
: *path
;
167 #if defined (_WIN32) || defined (__OS2__)
169 if (isalpha ((unsigned char)path
[0]) && (path
[1] == ':'))
171 if (asprintf (&buf
, "%s:///%c:", scheme
? scheme
: "file",
175 # warning Drive letter-relative path not implemented!
176 if (path
[0] != DIR_SEP_CHAR
)
183 if (!strncmp (path
, "\\\\", 2))
184 { /* Windows UNC paths */
185 /* \\host\share\path -> file://host/share/path */
186 int hostlen
= strcspn (path
+ 2, DIR_SEP
);
188 if (asprintf (&buf
, "file://%.*s", hostlen
, path
+ 2) == -1)
193 return buf
; /* Hostname without path */
197 if (path
[0] != DIR_SEP_CHAR
)
198 { /* Relative path: prepend the current working directory */
201 if ((cwd
= vlc_getcwd ()) == NULL
)
203 if (asprintf (&buf
, "%s"DIR_SEP
"%s", cwd
, path
) == -1)
207 ret
= (buf
!= NULL
) ? vlc_path2uri (buf
, scheme
) : NULL
;
212 if (asprintf (&buf
, "%s://", scheme
? scheme
: "file") == -1)
217 /* Absolute file path */
218 assert (path
[0] == DIR_SEP_CHAR
);
221 size_t len
= strcspn (++path
, DIR_SEP
);
224 char *component
= encode_URI_bytes (path
- len
, &len
);
225 if (unlikely(component
== NULL
))
230 component
[len
] = '\0';
233 int val
= asprintf (&uri
, "%s/%s", buf
, component
);
236 if (unlikely(val
== -1))
245 char *vlc_uri2path (const char *url
)
250 char *path
= strstr (url
, "://");
252 return NULL
; /* unsupported scheme or invalid syntax */
254 end
= memchr (url
, '/', path
- url
);
255 size_t schemelen
= ((end
!= NULL
) ? end
: path
) - url
;
256 path
+= 3; /* skip "://" */
258 /* Remove request parameters and/or HTML anchor if present */
259 end
= path
+ strcspn (path
, "?#");
260 path
= strndup (path
, end
- path
);
261 if (unlikely(path
== NULL
))
262 return NULL
; /* boom! */
265 vlc_uri_decode (path
);
267 if (schemelen
== 4 && !strncasecmp (url
, "file", 4))
269 #if !defined (_WIN32) && !defined (__OS2__)
270 /* Leading slash => local path */
273 /* Local path disguised as a remote one */
274 if (!strncasecmp (path
, "localhost/", 10))
275 return memmove (path
, path
+ 9, strlen (path
+ 9) + 1);
277 /* cannot start with a space */
280 for (char *p
= strchr (path
, '/'); p
; p
= strchr (p
+ 1, '/'))
283 /* Leading backslash => local path */
285 return memmove (path
, path
+ 1, strlen (path
+ 1) + 1);
286 /* Local path disguised as a remote one */
287 if (!strncasecmp (path
, "localhost\\", 10))
288 return memmove (path
, path
+ 10, strlen (path
+ 10) + 1);
290 if (*path
&& asprintf (&ret
, "\\\\%s", path
) == -1)
293 /* non-local path :-( */
296 if (schemelen
== 2 && !strncasecmp (url
, "fd", 2))
298 int fd
= strtol (path
, &end
, 0);
303 #if !defined( _WIN32 ) && !defined( __OS2__ )
307 ret
= strdup ("/dev/stdin");
310 ret
= strdup ("/dev/stdout");
313 ret
= strdup ("/dev/stderr");
316 if (asprintf (&ret
, "/dev/fd/%d", fd
) == -1)
320 /* XXX: Does this work on WinCE? */
322 ret
= strdup ("CON");
328 return ret
; /* unknown scheme */
331 static char *vlc_idna_to_ascii (const char *);
334 static char *vlc_iri2uri(const char *iri
)
338 for (size_t i
= 0; iri
[i
] != '\0'; i
++)
340 unsigned char c
= iri
[i
];
348 if (unlikely((a
+ u
) > (SIZE_MAX
/ 4)))
354 char *uri
= malloc(a
+ 3 * u
+ 1), *p
;
355 if (unlikely(uri
== NULL
))
358 for (p
= uri
; *iri
!= '\0'; iri
++)
360 unsigned char c
= *iri
;
367 *(p
++) = urihex
[c
>> 4];
368 *(p
++) = urihex
[c
& 0xf];
376 static bool vlc_uri_component_validate(const char *str
, const char *extras
)
380 for (size_t i
= 0; str
[i
] != '\0'; i
++)
384 if (isurisafe(c
) || isurisubdelim(c
))
386 if (strchr(extras
, c
) != NULL
)
388 if (c
== '%' && isurihex(str
[i
+ 1]) && isurihex(str
[i
+ 2]))
398 static bool vlc_uri_host_validate(const char *str
)
400 return vlc_uri_component_validate(str
, ":");
403 static bool vlc_uri_path_validate(const char *str
)
405 return vlc_uri_component_validate(str
, "/@:");
408 static int vlc_UrlParseInner(vlc_url_t
*restrict url
, const char *str
)
410 url
->psz_protocol
= NULL
;
411 url
->psz_username
= NULL
;
412 url
->psz_password
= NULL
;
413 url
->psz_host
= NULL
;
415 url
->psz_path
= NULL
;
416 url
->psz_option
= NULL
;
417 url
->psz_fragment
= NULL
;
418 url
->psz_buffer
= NULL
;
419 url
->psz_pathbuffer
= NULL
;
427 char *buf
= vlc_iri2uri(str
);
428 if (unlikely(buf
== NULL
))
430 url
->psz_buffer
= buf
;
432 char *cur
= buf
, *next
;
437 while (isurialnum(*next
) || memchr ("+-.", *next
, 3) != NULL
)
443 url
->psz_protocol
= cur
;
448 next
= strchr(cur
, '#');
452 if (vlc_uri_component_validate(next
, "/?"))
453 url
->psz_fragment
= next
;
456 /* Query parameters */
457 next
= strchr(cur
, '?');
461 url
->psz_option
= next
;
465 if (strncmp(cur
, "//", 2) == 0)
470 next
= strchr(cur
, '/');
473 *next
= '\0'; /* temporary nul, reset to slash later */
474 url
->psz_path
= next
;
477 url->psz_path = "/";*/
480 next
= strrchr(cur
, '@');
484 url
->psz_username
= cur
;
487 /* Password (obsolete) */
488 next
= strchr(url
->psz_username
, ':');
492 url
->psz_password
= next
;
493 vlc_uri_decode(url
->psz_password
);
495 vlc_uri_decode(url
->psz_username
);
499 if (*cur
== '[' && (next
= strrchr(cur
, ']')) != NULL
)
500 { /* Try IPv6 numeral within brackets */
502 url
->psz_host
= strdup(cur
+ 1);
511 next
= strchr(cur
, ':');
515 url
->psz_host
= vlc_idna_to_ascii(vlc_uri_decode(cur
));
518 if (url
->psz_host
== NULL
)
521 if (!vlc_uri_host_validate(url
->psz_host
))
524 url
->psz_host
= NULL
;
530 if (next
!= NULL
&& *next
)
533 unsigned long port
= strtoul(next
, &end
, 10);
535 if (strchr("0123456789", *next
) == NULL
|| *end
|| port
> UINT_MAX
)
544 if (url
->psz_path
!= NULL
)
545 *url
->psz_path
= '/'; /* restore leading slash */
555 int vlc_UrlParse(vlc_url_t
*url
, const char *str
)
557 int ret
= vlc_UrlParseInner(url
, str
);
559 if (url
->psz_path
!= NULL
&& !vlc_uri_path_validate(url
->psz_path
))
561 url
->psz_path
= NULL
;
568 static char *vlc_uri_fixup_inner(const char *str
, const char *extras
);
570 int vlc_UrlParseFixup(vlc_url_t
*url
, const char *str
)
572 int ret
= vlc_UrlParseInner(url
, str
);
574 static const char pathextras
[] = "/@:";
576 if (url
->psz_path
!= NULL
577 && !vlc_uri_component_validate(url
->psz_path
, pathextras
))
579 url
->psz_pathbuffer
= vlc_uri_fixup_inner(url
->psz_path
, pathextras
);
580 if (url
->psz_pathbuffer
== NULL
)
582 url
->psz_path
= NULL
;
588 url
->psz_path
= url
->psz_pathbuffer
;
589 assert(vlc_uri_path_validate(url
->psz_path
));
595 void vlc_UrlClean (vlc_url_t
*restrict url
)
597 free (url
->psz_host
);
598 free (url
->psz_buffer
);
599 free (url
->psz_pathbuffer
);
605 * See IETF RFC3986 section 5.2.3 for details.
607 static char *vlc_uri_merge_paths(const char *base
, const char *ref
)
613 len
= asprintf(&str
, "/%s", ref
);
616 const char *end
= strrchr(base
, '/');
623 len
= asprintf(&str
, "%.*s%s", (int)(end
- base
), base
, ref
);
626 if (unlikely(len
== -1))
632 * Remove dot segments
634 * See IETF RFC3986 section 5.2.4 for details.
636 static char *vlc_uri_remove_dot_segments(char *str
)
638 char *input
= str
, *output
= str
;
640 while (input
[0] != '\0')
642 assert(output
<= input
);
644 if (strncmp(input
, "../", 3) == 0)
649 if (strncmp(input
, "./", 2) == 0)
654 if (strncmp(input
, "/./", 3) == 0)
659 if (strcmp(input
, "/.") == 0)
664 if (strncmp(input
, "/../", 4) == 0)
667 output
= memrchr(str
, '/', output
- str
);
672 if (strcmp(input
, "/..") == 0)
675 output
= memrchr(str
, '/', output
- str
);
680 if (strcmp(input
, ".") == 0)
685 if (strcmp(input
, "..") == 0)
692 *(output
++) = *(input
++);
694 size_t len
= strcspn(input
, "/");
697 memmove(output
, input
, len
);
707 char *vlc_uri_compose(const vlc_url_t
*uri
)
709 struct vlc_memstream stream
;
712 vlc_memstream_open(&stream
);
714 if (uri
->psz_protocol
!= NULL
)
715 vlc_memstream_printf(&stream
, "%s:", uri
->psz_protocol
);
717 if (uri
->psz_host
!= NULL
)
719 vlc_memstream_write(&stream
, "//", 2);
721 if (uri
->psz_username
!= NULL
)
723 enc
= vlc_uri_encode(uri
->psz_username
);
727 vlc_memstream_puts(&stream
, enc
);
730 if (uri
->psz_password
!= NULL
)
732 enc
= vlc_uri_encode(uri
->psz_password
);
733 if (unlikely(enc
== NULL
))
736 vlc_memstream_printf(&stream
, ":%s", enc
);
739 vlc_memstream_putc(&stream
, '@');
744 if (strchr(uri
->psz_host
, ':') != NULL
)
745 fmt
= (uri
->i_port
!= 0) ? "[%s]:%d" : "[%s]";
747 fmt
= (uri
->i_port
!= 0) ? "%s:%d" : "%s";
748 /* No IDNA decoding here. Seems unnecessary, dangerous even. */
749 vlc_memstream_printf(&stream
, fmt
, uri
->psz_host
, uri
->i_port
);
752 if (uri
->psz_path
!= NULL
)
753 vlc_memstream_puts(&stream
, uri
->psz_path
);
754 if (uri
->psz_option
!= NULL
)
755 vlc_memstream_printf(&stream
, "?%s", uri
->psz_option
);
756 if (uri
->psz_fragment
!= NULL
)
757 vlc_memstream_printf(&stream
, "#%s", uri
->psz_fragment
);
759 if (vlc_memstream_close(&stream
))
764 if (vlc_memstream_close(&stream
) == 0)
769 char *vlc_uri_resolve(const char *base
, const char *ref
)
771 vlc_url_t base_uri
, rel_uri
;
773 char *pathbuf
= NULL
, *ret
= NULL
;
775 if (vlc_UrlParse(&rel_uri
, ref
))
777 vlc_UrlClean(&rel_uri
);
781 if (rel_uri
.psz_protocol
!= NULL
)
782 { /* Short circuit in case of absolute URI */
783 vlc_UrlClean(&rel_uri
);
787 vlc_UrlParse(&base_uri
, base
);
789 /* RFC3986 section 5.2.2 */
793 tgt_uri
.psz_protocol
= base_uri
.psz_protocol
;
795 if (rel_uri
.psz_host
!= NULL
)
798 tgt_uri
.psz_username
= base_uri
.psz_username
;
799 tgt_uri
.psz_password
= base_uri
.psz_password
;
800 tgt_uri
.psz_host
= base_uri
.psz_host
;
801 tgt_uri
.i_port
= base_uri
.i_port
;
803 if (rel_uri
.psz_path
== NULL
|| rel_uri
.psz_path
[0] == '\0')
805 tgt_uri
.psz_path
= base_uri
.psz_path
;
806 if (rel_uri
.psz_option
== NULL
)
807 tgt_uri
.psz_option
= base_uri
.psz_option
;
811 if (rel_uri
.psz_path
[0] == '/')
814 pathbuf
= vlc_uri_merge_paths(base_uri
.psz_path
, rel_uri
.psz_path
);
815 if (unlikely(pathbuf
== NULL
))
818 tgt_uri
.psz_path
= pathbuf
;
822 if (tgt_uri
.psz_path
!= NULL
)
823 vlc_uri_remove_dot_segments(tgt_uri
.psz_path
);
825 ret
= vlc_uri_compose(&tgt_uri
);
828 vlc_UrlClean(&base_uri
);
829 vlc_UrlClean(&rel_uri
);
833 static char *vlc_uri_fixup_inner(const char *str
, const char *extras
)
835 assert(str
&& extras
);
837 bool encode_percent
= false;
838 for (size_t i
= 0; str
[i
] != '\0'; i
++)
839 if (str
[i
] == '%' && !(isurihex(str
[i
+1]) && isurihex(str
[i
+2])))
841 encode_percent
= true;
845 struct vlc_memstream stream
;
847 vlc_memstream_open(&stream
);
849 for (size_t i
= 0; str
[i
] != '\0'; i
++)
851 unsigned char c
= str
[i
];
853 if (isurisafe(c
) || isurisubdelim(c
) || (strchr(extras
, c
) != NULL
)
854 || (c
== '%' && !encode_percent
))
855 vlc_memstream_putc(&stream
, c
);
857 vlc_memstream_printf(&stream
, "%%%02hhX", c
);
860 if (vlc_memstream_close(&stream
))
865 static void vlc_uri_putc(struct vlc_memstream
*s
, int c
, const char *extras
)
867 if (isurisafe(c
) || isurisubdelim(c
) || (strchr(extras
, c
) != NULL
))
868 vlc_memstream_putc(s
, c
);
870 vlc_memstream_printf(s
, "%%%02hhX", c
);
873 char *vlc_uri_fixup(const char *str
)
877 /* If percent sign is consistently followed by two hexadecimal digits,
878 * then URL encoding must be assumed.
879 * Otherwise, the percent sign itself must be URL-encoded.
881 bool encode_percent
= false;
883 for (const char *p
= str
; *p
!= '\0'; p
++)
884 if (p
[0] == '%' && !(isurihex(p
[1]) && isurihex(p
[2])))
886 encode_percent
= true;
890 struct vlc_memstream stream
;
891 vlc_memstream_open(&stream
);
893 /* Handle URI scheme */
895 bool absolute
= false;
896 bool encode_brackets
= true;
898 while (isurialnum(*p
) || memchr("+-.", *p
, 3) != NULL
)
899 vlc_memstream_putc(&stream
, *(p
++));
901 if (p
> str
&& *p
== ':')
902 { /* There is an URI scheme, assume an absolute URI. */
903 vlc_memstream_putc(&stream
, *(p
++));
905 encode_brackets
= false;
908 /* Handle URI authority */
909 if ((absolute
|| p
== str
) && strncmp(p
, "//", 2) == 0)
911 vlc_memstream_write(&stream
, p
, 2);
913 encode_brackets
= true;
915 while (memchr("/?#", *p
, 4) == NULL
)
916 vlc_uri_putc(&stream
, *(p
++), "%:[]@" + encode_percent
);
919 /* Handle URI path and what follows */
920 const char *extras
= encode_brackets
? "%/?#@" : "%:/?#[]@";
923 vlc_uri_putc(&stream
, *(p
++), extras
+ encode_percent
);
925 return vlc_memstream_close(&stream
) ? NULL
: stream
.ptr
;
928 #if defined (HAVE_IDN)
930 #elif defined (_WIN32)
931 # include <windows.h>
932 # include <vlc_charset.h>
936 * Converts a UTF-8 nul-terminated IDN to nul-terminated ASCII domain name.
937 * \param idn UTF-8 Internationalized Domain Name to convert
938 * \return a heap-allocated string or NULL on error.
940 static char *vlc_idna_to_ascii (const char *idn
)
942 #if defined (HAVE_IDN)
945 switch (idna_to_ascii_8z(idn
, &adn
, IDNA_ALLOW_UNASSIGNED
))
949 case IDNA_MALLOC_ERROR
:
952 case IDNA_DLOPEN_ERROR
:
960 #elif defined (_WIN32)
966 wchar_t *wide
= ToWide (idn
);
970 int len
= IdnToAscii (IDN_ALLOW_UNASSIGNED
, wide
, -1, NULL
, 0);
977 wchar_t *buf
= vlc_alloc (len
, sizeof (*buf
));
978 if (unlikely(buf
== NULL
))
980 if (!IdnToAscii (IDN_ALLOW_UNASSIGNED
, wide
, -1, buf
, len
))
986 ret
= FromWide (buf
);
993 /* No IDN support, filter out non-ASCII domain names */
994 for (const char *p
= idn
; *p
; p
++)
995 if (((unsigned char)*p
) >= 0x80)
1001 return strdup (idn
);