Revert "contrib: Allow a minimum version to be required"
[vlc.git] / src / text / url.c
blob1c86bedb58b8d89577866ad00a69d75c1ec27223
1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
22 #ifdef HAVE_CONFIG_H
23 # include "config.h"
24 #endif
26 #include <errno.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #ifdef _WIN32
34 # include <io.h>
35 #endif
37 #include <vlc_common.h>
38 #include <vlc_memstream.h>
39 #include <vlc_url.h>
40 #include <vlc_fs.h>
41 #include <ctype.h>
43 char *vlc_uri_decode_duplicate (const char *str)
45 char *buf = strdup (str);
46 if (vlc_uri_decode (buf) == NULL)
48 free (buf);
49 buf = NULL;
51 return buf;
54 char *vlc_uri_decode (char *str)
56 char *in = str, *out = str;
57 if (in == NULL)
58 return NULL;
60 char c;
61 while ((c = *(in++)) != '\0')
63 if (c == '%')
65 char hex[3];
67 if (!(hex[0] = *(in++)) || !(hex[1] = *(in++)))
68 return NULL;
69 hex[2] = '\0';
70 *(out++) = strtoul (hex, NULL, 0x10);
72 else
73 *(out++) = c;
75 *out = '\0';
76 return str;
79 static bool isurisafe (int c)
81 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
82 return ((unsigned char)(c - 'a') < 26)
83 || ((unsigned char)(c - 'A') < 26)
84 || ((unsigned char)(c - '0') < 10)
85 || (strchr ("-._~", c) != NULL);
88 static bool isurisubdelim(int c)
90 return strchr("!$&'()*+,;=", c) != NULL;
93 static bool isurihex(int c)
94 { /* Same as isxdigit() but does not depend on locale and unsignedness */
95 return ((unsigned char)(c - '0') < 10)
96 || ((unsigned char)(c - 'A') < 6)
97 || ((unsigned char)(c - 'a') < 6);
100 static const char urihex[] = "0123456789ABCDEF";
102 static char *encode_URI_bytes (const char *str, size_t *restrict lenp)
104 char *buf = malloc (3 * *lenp + 1);
105 if (unlikely(buf == NULL))
106 return NULL;
108 char *out = buf;
109 for (size_t i = 0; i < *lenp; i++)
111 unsigned char c = str[i];
113 if (isurisafe (c))
114 *(out++) = c;
115 /* This is URI encoding, not HTTP forms:
116 * Space is encoded as '%20', not '+'. */
117 else
119 *(out++) = '%';
120 *(out++) = urihex[c >> 4];
121 *(out++) = urihex[c & 0xf];
125 *lenp = out - buf;
126 out = realloc (buf, *lenp + 1);
127 return likely(out != NULL) ? out : buf;
130 char *vlc_uri_encode (const char *str)
132 size_t len = strlen (str);
133 char *ret = encode_URI_bytes (str, &len);
134 if (likely(ret != NULL))
135 ret[len] = '\0';
136 return ret;
139 char *vlc_path2uri (const char *path, const char *scheme)
141 if (path == NULL)
143 errno = EINVAL;
144 return NULL;
146 if (scheme == NULL && !strcmp (path, "-"))
147 return strdup ("fd://0"); // standard input
148 /* Note: VLC cannot handle URI schemes without double slash after the
149 * scheme name (such as mailto: or news:). */
151 char *buf;
153 #ifdef __OS2__
154 char p[strlen (path) + 1];
156 for (buf = p; *path; buf++, path++)
157 *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
158 *buf = '\0';
160 path = p;
161 #endif
163 #if defined (_WIN32) || defined (__OS2__)
164 /* Drive letter */
165 if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
167 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
168 path[0]) == -1)
169 buf = NULL;
170 path += 2;
171 # warning Drive letter-relative path not implemented!
172 if (path[0] != DIR_SEP_CHAR)
174 errno = ENOTSUP;
175 return NULL;
178 else
179 if (!strncmp (path, "\\\\", 2))
180 { /* Windows UNC paths */
181 /* \\host\share\path -> file://host/share/path */
182 int hostlen = strcspn (path + 2, DIR_SEP);
184 if (asprintf (&buf, "file://%.*s", hostlen, path + 2) == -1)
185 buf = NULL;
186 path += 2 + hostlen;
188 if (path[0] == '\0')
189 return buf; /* Hostname without path */
191 else
192 #endif
193 if (path[0] != DIR_SEP_CHAR)
194 { /* Relative path: prepend the current working directory */
195 char *cwd, *ret;
197 if ((cwd = vlc_getcwd ()) == NULL)
198 return NULL;
199 if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
200 buf = NULL;
202 free (cwd);
203 ret = (buf != NULL) ? vlc_path2uri (buf, scheme) : NULL;
204 free (buf);
205 return ret;
207 else
208 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
209 buf = NULL;
210 if (buf == NULL)
211 return NULL;
213 /* Absolute file path */
214 assert (path[0] == DIR_SEP_CHAR);
217 size_t len = strcspn (++path, DIR_SEP);
218 path += len;
220 char *component = encode_URI_bytes (path - len, &len);
221 if (unlikely(component == NULL))
223 free (buf);
224 return NULL;
226 component[len] = '\0';
228 char *uri;
229 int val = asprintf (&uri, "%s/%s", buf, component);
230 free (component);
231 free (buf);
232 if (unlikely(val == -1))
233 return NULL;
234 buf = uri;
236 while (*path);
238 return buf;
241 char *vlc_uri2path (const char *url)
243 char *ret = NULL;
244 char *end;
246 char *path = strstr (url, "://");
247 if (path == NULL)
248 return NULL; /* unsupported scheme or invalid syntax */
250 end = memchr (url, '/', path - url);
251 size_t schemelen = ((end != NULL) ? end : path) - url;
252 path += 3; /* skip "://" */
254 /* Remove request parameters and/or HTML anchor if present */
255 end = path + strcspn (path, "?#");
256 path = strndup (path, end - path);
257 if (unlikely(path == NULL))
258 return NULL; /* boom! */
260 /* Decode path */
261 vlc_uri_decode (path);
263 if (schemelen == 4 && !strncasecmp (url, "file", 4))
265 #if !defined (_WIN32) && !defined (__OS2__)
266 /* Leading slash => local path */
267 if (*path == '/')
268 return path;
269 /* Local path disguised as a remote one */
270 if (!strncasecmp (path, "localhost/", 10))
271 return memmove (path, path + 9, strlen (path + 9) + 1);
272 #else
273 /* cannot start with a space */
274 if (*path == ' ')
275 goto out;
276 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
277 *p = '\\';
279 /* Leading backslash => local path */
280 if (*path == '\\')
281 return memmove (path, path + 1, strlen (path + 1) + 1);
282 /* Local path disguised as a remote one */
283 if (!strncasecmp (path, "localhost\\", 10))
284 return memmove (path, path + 10, strlen (path + 10) + 1);
285 /* UNC path */
286 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
287 ret = NULL;
288 #endif
289 /* non-local path :-( */
291 else
292 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
294 int fd = strtol (path, &end, 0);
296 if (*end)
297 goto out;
299 #if !defined( _WIN32 ) && !defined( __OS2__ )
300 switch (fd)
302 case 0:
303 ret = strdup ("/dev/stdin");
304 break;
305 case 1:
306 ret = strdup ("/dev/stdout");
307 break;
308 case 2:
309 ret = strdup ("/dev/stderr");
310 break;
311 default:
312 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
313 ret = NULL;
315 #else
316 /* XXX: Does this work on WinCE? */
317 if (fd < 2)
318 ret = strdup ("CON");
319 #endif
322 out:
323 free (path);
324 return ret; /* unknown scheme */
327 static char *vlc_idna_to_ascii (const char *);
329 /* RFC3987 §3.1 */
330 static char *vlc_iri2uri(const char *iri)
332 size_t a = 0, u = 0;
334 for (size_t i = 0; iri[i] != '\0'; i++)
336 unsigned char c = iri[i];
338 if (c < 128)
339 a++;
340 else
341 u++;
344 if (unlikely((a + u) > (SIZE_MAX / 4)))
346 errno = ENOMEM;
347 return NULL;
350 char *uri = malloc(a + 3 * u + 1), *p;
351 if (unlikely(uri == NULL))
352 return NULL;
354 for (p = uri; *iri != '\0'; iri++)
356 unsigned char c = *iri;
358 if (c < 128)
359 *(p++) = c;
360 else
362 *(p++) = '%';
363 *(p++) = urihex[c >> 4];
364 *(p++) = urihex[c & 0xf];
368 *p = '\0';
369 return uri;
372 static bool vlc_uri_component_validate(const char *str, const char *extras)
374 assert(str != NULL);
376 for (size_t i = 0; str[i] != '\0'; i++)
378 int c = str[i];
380 if (isurisafe(c) || isurisubdelim(c))
381 continue;
382 if (strchr(extras, c) != NULL)
383 continue;
384 if (c == '%' && isurihex(str[i + 1]) && isurihex(str[i + 2]))
386 i += 2;
387 continue;
389 return false;
391 return true;
394 static bool vlc_uri_host_validate(const char *str)
396 return vlc_uri_component_validate(str, ":");
399 static bool vlc_uri_path_validate(const char *str)
401 return vlc_uri_component_validate(str, "/@:");
404 static int vlc_UrlParseInner(vlc_url_t *restrict url, const char *str)
406 url->psz_protocol = NULL;
407 url->psz_username = NULL;
408 url->psz_password = NULL;
409 url->psz_host = NULL;
410 url->i_port = 0;
411 url->psz_path = NULL;
412 url->psz_option = NULL;
413 url->psz_buffer = NULL;
414 url->psz_pathbuffer = NULL;
416 if (str == NULL)
418 errno = EINVAL;
419 return -1;
422 char *buf = vlc_iri2uri(str);
423 if (unlikely(buf == NULL))
424 return -1;
425 url->psz_buffer = buf;
427 char *cur = buf, *next;
428 int ret = 0;
430 /* URI scheme */
431 next = buf;
432 while ((*next >= 'A' && *next <= 'Z') || (*next >= 'a' && *next <= 'z')
433 || (*next >= '0' && *next <= '9') || memchr ("+-.", *next, 3) != NULL)
434 next++;
436 if (*next == ':')
438 *(next++) = '\0';
439 url->psz_protocol = cur;
440 cur = next;
443 /* Fragment */
444 next = strchr(cur, '#');
445 if (next != NULL)
447 #if 0 /* TODO */
448 *(next++) = '\0';
449 url->psz_fragment = next;
450 #else
451 *next = '\0';
452 #endif
455 /* Query parameters */
456 next = strchr(cur, '?');
457 if (next != NULL)
459 *(next++) = '\0';
460 url->psz_option = next;
463 /* Authority */
464 if (strncmp(cur, "//", 2) == 0)
466 cur += 2;
468 /* Path */
469 next = strchr(cur, '/');
470 if (next != NULL)
472 *next = '\0'; /* temporary nul, reset to slash later */
473 url->psz_path = next;
475 /*else
476 url->psz_path = "/";*/
478 /* User name */
479 next = strrchr(cur, '@');
480 if (next != NULL)
482 *(next++) = '\0';
483 url->psz_username = cur;
484 cur = next;
486 /* Password (obsolete) */
487 next = strchr(url->psz_username, ':');
488 if (next != NULL)
490 *(next++) = '\0';
491 url->psz_password = next;
492 vlc_uri_decode(url->psz_password);
494 vlc_uri_decode(url->psz_username);
497 /* Host name */
498 if (*cur == '[' && (next = strrchr(cur, ']')) != NULL)
499 { /* Try IPv6 numeral within brackets */
500 *(next++) = '\0';
501 url->psz_host = strdup(cur + 1);
503 if (*next == ':')
504 next++;
505 else
506 next = NULL;
508 else
510 next = strchr(cur, ':');
511 if (next != NULL)
512 *(next++) = '\0';
514 url->psz_host = vlc_idna_to_ascii(vlc_uri_decode(cur));
517 if (url->psz_host == NULL)
518 ret = -1;
519 else
520 if (!vlc_uri_host_validate(url->psz_host))
522 free(url->psz_host);
523 url->psz_host = NULL;
524 errno = EINVAL;
525 ret = -1;
528 /* Port number */
529 if (next != NULL && *next)
531 char* end;
532 unsigned long port = strtoul(next, &end, 10);
534 if (strchr("0123456789", *next) == NULL || *end || port > UINT_MAX)
536 errno = EINVAL;
537 ret = -1;
540 url->i_port = port;
543 if (url->psz_path != NULL)
544 *url->psz_path = '/'; /* restore leading slash */
546 else
548 url->psz_path = cur;
551 return ret;
554 int vlc_UrlParse(vlc_url_t *url, const char *str)
556 int ret = vlc_UrlParseInner(url, str);
558 if (url->psz_path != NULL && !vlc_uri_path_validate(url->psz_path))
560 url->psz_path = NULL;
561 errno = EINVAL;
562 ret = -1;
564 return ret;
567 static char *vlc_uri_fixup_inner(const char *str, const char *extras);
569 int vlc_UrlParseFixup(vlc_url_t *url, const char *str)
571 int ret = vlc_UrlParseInner(url, str);
573 static const char pathextras[] = "/@:";
575 if (url->psz_path != NULL
576 && !vlc_uri_component_validate(url->psz_path, pathextras))
578 url->psz_pathbuffer = vlc_uri_fixup_inner(url->psz_path, pathextras);
579 if (url->psz_pathbuffer == NULL)
581 url->psz_path = NULL;
582 errno = ENOMEM;
583 ret = -1;
585 else
587 url->psz_path = url->psz_pathbuffer;
588 assert(vlc_uri_path_validate(url->psz_path));
591 return ret;
594 void vlc_UrlClean (vlc_url_t *restrict url)
596 free (url->psz_host);
597 free (url->psz_buffer);
598 free (url->psz_pathbuffer);
602 * Merge paths
604 * See IETF RFC3986 section 5.2.3 for details.
606 static char *vlc_uri_merge_paths(const char *base, const char *ref)
608 char *str;
609 int len;
611 if (base == NULL)
612 len = asprintf(&str, "/%s", ref);
613 else
615 const char *end = strrchr(base, '/');
617 if (end != NULL)
618 end++;
619 else
620 end = base;
622 len = asprintf(&str, "%.*s%s", (int)(end - base), base, ref);
625 if (unlikely(len == -1))
626 str = NULL;
627 return str;
631 * Remove dot segments
633 * See IETF RFC3986 section 5.2.4 for details.
635 static char *vlc_uri_remove_dot_segments(char *str)
637 char *input = str, *output = str;
639 while (input[0] != '\0')
641 assert(output <= input);
643 if (strncmp(input, "../", 3) == 0)
645 input += 3;
646 continue;
648 if (strncmp(input, "./", 2) == 0)
650 input += 2;
651 continue;
653 if (strncmp(input, "/./", 3) == 0)
655 input += 2;
656 continue;
658 if (strcmp(input, "/.") == 0)
660 input[1] = '\0';
661 continue;
663 if (strncmp(input, "/../", 4) == 0)
665 input += 3;
666 output = memrchr(str, '/', output - str);
667 if (output == NULL)
668 output = str;
669 continue;
671 if (strcmp(input, "/..") == 0)
673 input[1] = '\0';
674 output = memrchr(str, '/', output - str);
675 if (output == NULL)
676 output = str;
677 continue;
679 if (strcmp(input, ".") == 0)
681 input++;
682 continue;
684 if (strcmp(input, "..") == 0)
686 input += 2;
687 continue;
690 if (input[0] == '/')
691 *(output++) = *(input++);
693 size_t len = strcspn(input, "/");
695 if (input != output)
696 memmove(output, input, len);
698 input += len;
699 output += len;
702 output[0] = '\0';
703 return str;
706 char *vlc_uri_compose(const vlc_url_t *uri)
708 struct vlc_memstream stream;
709 char *enc;
711 vlc_memstream_open(&stream);
713 if (uri->psz_protocol != NULL)
714 vlc_memstream_printf(&stream, "%s:", uri->psz_protocol);
716 if (uri->psz_host != NULL)
718 vlc_memstream_write(&stream, "//", 2);
720 if (uri->psz_username != NULL)
722 enc = vlc_uri_encode(uri->psz_username);
723 if (enc == NULL)
724 goto error;
726 vlc_memstream_puts(&stream, enc);
727 free(enc);
729 if (uri->psz_password != NULL)
731 enc = vlc_uri_encode(uri->psz_password);
732 if (unlikely(enc == NULL))
733 goto error;
735 vlc_memstream_printf(&stream, ":%s", enc);
736 free(enc);
738 vlc_memstream_putc(&stream, '@');
741 const char *fmt;
743 if (strchr(uri->psz_host, ':') != NULL)
744 fmt = (uri->i_port != 0) ? "[%s]:%d" : "[%s]";
745 else
746 fmt = (uri->i_port != 0) ? "%s:%d" : "%s";
747 /* No IDNA decoding here. Seems unnecessary, dangerous even. */
748 vlc_memstream_printf(&stream, fmt, uri->psz_host, uri->i_port);
751 if (uri->psz_path != NULL)
752 vlc_memstream_puts(&stream, uri->psz_path);
753 if (uri->psz_option != NULL)
754 vlc_memstream_printf(&stream, "?%s", uri->psz_option);
755 /* NOTE: fragment not handled currently */
757 if (vlc_memstream_close(&stream))
758 return NULL;
759 return stream.ptr;
761 error:
762 if (vlc_memstream_close(&stream) == 0)
763 free(stream.ptr);
764 return NULL;
767 char *vlc_uri_resolve(const char *base, const char *ref)
769 vlc_url_t base_uri, rel_uri;
770 vlc_url_t tgt_uri;
771 char *pathbuf = NULL, *ret = NULL;
773 if (vlc_UrlParse(&rel_uri, ref))
775 vlc_UrlClean(&rel_uri);
776 return NULL;
779 if (rel_uri.psz_protocol != NULL)
780 { /* Short circuit in case of absolute URI */
781 vlc_UrlClean(&rel_uri);
782 return strdup(ref);
785 vlc_UrlParse(&base_uri, base);
787 /* RFC3986 section 5.2.2 */
790 tgt_uri = rel_uri;
791 tgt_uri.psz_protocol = base_uri.psz_protocol;
793 if (rel_uri.psz_host != NULL)
794 break;
796 tgt_uri.psz_username = base_uri.psz_username;
797 tgt_uri.psz_password = base_uri.psz_password;
798 tgt_uri.psz_host = base_uri.psz_host;
799 tgt_uri.i_port = base_uri.i_port;
801 if (rel_uri.psz_path == NULL || rel_uri.psz_path[0] == '\0')
803 tgt_uri.psz_path = base_uri.psz_path;
804 if (rel_uri.psz_option == NULL)
805 tgt_uri.psz_option = base_uri.psz_option;
806 break;
809 if (rel_uri.psz_path[0] == '/')
810 break;
812 pathbuf = vlc_uri_merge_paths(base_uri.psz_path, rel_uri.psz_path);
813 if (unlikely(pathbuf == NULL))
814 goto error;
816 tgt_uri.psz_path = pathbuf;
818 while (0);
820 if (tgt_uri.psz_path != NULL)
821 vlc_uri_remove_dot_segments(tgt_uri.psz_path);
823 ret = vlc_uri_compose(&tgt_uri);
824 error:
825 free(pathbuf);
826 vlc_UrlClean(&base_uri);
827 vlc_UrlClean(&rel_uri);
828 return ret;
831 static char *vlc_uri_fixup_inner(const char *str, const char *extras)
833 assert(str && extras);
835 bool encode_percent = false;
836 for (size_t i = 0; str[i] != '\0'; i++)
837 if (str[i] == '%' && !(isurihex(str[i+1]) && isurihex(str[i+2])))
839 encode_percent = true;
840 break;
843 struct vlc_memstream stream;
845 vlc_memstream_open(&stream);
847 for (size_t i = 0; str[i] != '\0'; i++)
849 unsigned char c = str[i];
851 if (isurisafe(c) || isurisubdelim(c) || (strchr(extras, c) != NULL)
852 || (c == '%' && !encode_percent))
853 vlc_memstream_putc(&stream, c);
854 else
855 vlc_memstream_printf(&stream, "%%%02hhX", c);
858 if (vlc_memstream_close(&stream))
859 return NULL;
860 return stream.ptr;
863 char *vlc_uri_fixup(const char *str)
865 static const char extras[] = ":/?#[]@";
867 /* Rule number one is do not change a (potentially) valid URI */
868 if (vlc_uri_component_validate(str, extras))
869 return strdup(str);
871 return vlc_uri_fixup_inner(str, extras);
874 #if defined (HAVE_IDN)
875 # include <idna.h>
876 #elif defined (_WIN32)
877 # include <windows.h>
878 # include <vlc_charset.h>
879 #endif
882 * Converts a UTF-8 nul-terminated IDN to nul-terminated ASCII domain name.
883 * \param idn UTF-8 Internationalized Domain Name to convert
884 * \return a heap-allocated string or NULL on error.
886 static char *vlc_idna_to_ascii (const char *idn)
888 #if defined (HAVE_IDN)
889 char *adn;
891 switch (idna_to_ascii_8z(idn, &adn, IDNA_ALLOW_UNASSIGNED))
893 case IDNA_SUCCESS:
894 return adn;
895 case IDNA_MALLOC_ERROR:
896 errno = ENOMEM;
897 return NULL;
898 case IDNA_DLOPEN_ERROR:
899 errno = ENOSYS;
900 return NULL;
901 default:
902 errno = EINVAL;
903 return NULL;
906 #elif defined (_WIN32)
907 char *ret = NULL;
909 if (idn[0] == '\0')
910 return strdup("");
912 wchar_t *wide = ToWide (idn);
913 if (wide == NULL)
914 return NULL;
916 int len = IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, NULL, 0);
917 if (len == 0)
919 errno = EINVAL;
920 goto error;
923 wchar_t *buf = vlc_alloc (len, sizeof (*buf));
924 if (unlikely(buf == NULL))
925 goto error;
926 if (!IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, buf, len))
928 free (buf);
929 errno = EINVAL;
930 goto error;
932 ret = FromWide (buf);
933 free (buf);
934 error:
935 free (wide);
936 return ret;
938 #else
939 /* No IDN support, filter out non-ASCII domain names */
940 for (const char *p = idn; *p; p++)
941 if (((unsigned char)*p) >= 0x80)
943 errno = ENOSYS;
944 return NULL;
947 return strdup (idn);
949 #endif