msscript.ocx: Fake success in FreezeEvents.
[wine.git] / libs / xml2 / uri.c
blobc3d4871f02af2c9c553763227b2719598a406d1c
1 /**
2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
8 * daniel@veillard.com
9 */
11 #define IN_LIBXML
12 #include "libxml.h"
14 #include <limits.h>
15 #include <string.h>
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
22 #include "private/error.h"
24 /**
25 * MAX_URI_LENGTH:
27 * The definition of the URI regexp in the above RFC has no size limit
28 * In practice they are usually relatively short except for the
29 * data URI scheme as defined in RFC 2397. Even for data URI the usual
30 * maximum size before hitting random practical limits is around 64 KB
31 * and 4KB is usually a maximum admitted limit for proper operations.
32 * The value below is more a security limit than anything else and
33 * really should never be hit by 'normal' operations
34 * Set to 1 MByte in 2012, this is only enforced on output
36 #define MAX_URI_LENGTH 1024 * 1024
38 #define PORT_EMPTY 0
39 #define PORT_EMPTY_SERVER -1
41 static void
42 xmlURIErrMemory(const char *extra)
44 if (extra)
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 extra, NULL, NULL, 0, 0,
49 "Memory allocation failed : %s\n", extra);
50 else
51 __xmlRaiseError(NULL, NULL, NULL,
52 NULL, NULL, XML_FROM_URI,
53 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
54 NULL, NULL, NULL, 0, 0,
55 "Memory allocation failed\n");
58 static void xmlCleanURI(xmlURIPtr uri);
61 * Old rule from 2396 used in legacy handling code
62 * alpha = lowalpha | upalpha
64 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
68 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
69 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
70 * "u" | "v" | "w" | "x" | "y" | "z"
73 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
76 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
77 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
78 * "U" | "V" | "W" | "X" | "Y" | "Z"
80 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
82 #ifdef IS_DIGIT
83 #undef IS_DIGIT
84 #endif
86 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
88 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
91 * alphanum = alpha | digit
94 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
97 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
100 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
101 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
102 ((x) == '(') || ((x) == ')'))
105 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
108 #define IS_UNWISE(p) \
109 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
110 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
111 ((*(p) == ']')) || ((*(p) == '`')))
113 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
114 * "[" | "]"
117 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
118 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
119 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
120 ((x) == ']'))
123 * unreserved = alphanum | mark
126 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
129 * Skip to next pointer char, handle escaped sequences
132 #define NEXT(p) ((*p == '%')? p += 3 : p++)
135 * Productions from the spec.
137 * authority = server | reg_name
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
141 * path = [ abs_path | opaque_part ]
144 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
146 /************************************************************************
148 * RFC 3986 parser *
150 ************************************************************************/
152 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
153 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
154 ((*(p) >= 'A') && (*(p) <= 'Z')))
155 #define ISA_HEXDIG(p) \
156 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
157 ((*(p) >= 'A') && (*(p) <= 'F')))
160 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
161 * / "*" / "+" / "," / ";" / "="
163 #define ISA_SUB_DELIM(p) \
164 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
165 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
166 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
167 ((*(p) == '=')) || ((*(p) == '\'')))
170 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
172 #define ISA_GEN_DELIM(p) \
173 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
174 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
175 ((*(p) == '@')))
178 * reserved = gen-delims / sub-delims
180 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
183 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
185 #define ISA_UNRESERVED(p) \
186 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
187 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
190 * pct-encoded = "%" HEXDIG HEXDIG
192 #define ISA_PCT_ENCODED(p) \
193 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
196 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
198 #define ISA_PCHAR(p) \
199 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
200 ((*(p) == ':')) || ((*(p) == '@')))
203 * xmlParse3986Scheme:
204 * @uri: pointer to an URI structure
205 * @str: pointer to the string to analyze
207 * Parse an URI scheme
209 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
211 * Returns 0 or the error code
213 static int
214 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
215 const char *cur;
217 if (str == NULL)
218 return(-1);
220 cur = *str;
221 if (!ISA_ALPHA(cur))
222 return(2);
223 cur++;
224 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
225 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
226 if (uri != NULL) {
227 if (uri->scheme != NULL) xmlFree(uri->scheme);
228 uri->scheme = STRNDUP(*str, cur - *str);
230 *str = cur;
231 return(0);
235 * xmlParse3986Fragment:
236 * @uri: pointer to an URI structure
237 * @str: pointer to the string to analyze
239 * Parse the query part of an URI
241 * fragment = *( pchar / "/" / "?" )
242 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
243 * in the fragment identifier but this is used very broadly for
244 * xpointer scheme selection, so we are allowing it here to not break
245 * for example all the DocBook processing chains.
247 * Returns 0 or the error code
249 static int
250 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
252 const char *cur;
254 if (str == NULL)
255 return (-1);
257 cur = *str;
259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260 (*cur == '[') || (*cur == ']') ||
261 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
262 NEXT(cur);
263 if (uri != NULL) {
264 if (uri->fragment != NULL)
265 xmlFree(uri->fragment);
266 if (uri->cleanup & 2)
267 uri->fragment = STRNDUP(*str, cur - *str);
268 else
269 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
271 *str = cur;
272 return (0);
276 * xmlParse3986Query:
277 * @uri: pointer to an URI structure
278 * @str: pointer to the string to analyze
280 * Parse the query part of an URI
282 * query = *uric
284 * Returns 0 or the error code
286 static int
287 xmlParse3986Query(xmlURIPtr uri, const char **str)
289 const char *cur;
291 if (str == NULL)
292 return (-1);
294 cur = *str;
296 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
297 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
298 NEXT(cur);
299 if (uri != NULL) {
300 if (uri->query != NULL)
301 xmlFree(uri->query);
302 if (uri->cleanup & 2)
303 uri->query = STRNDUP(*str, cur - *str);
304 else
305 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
307 /* Save the raw bytes of the query as well.
308 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
310 if (uri->query_raw != NULL)
311 xmlFree (uri->query_raw);
312 uri->query_raw = STRNDUP (*str, cur - *str);
314 *str = cur;
315 return (0);
319 * xmlParse3986Port:
320 * @uri: pointer to an URI structure
321 * @str: the string to analyze
323 * Parse a port part and fills in the appropriate fields
324 * of the @uri structure
326 * port = *DIGIT
328 * Returns 0 or the error code
330 static int
331 xmlParse3986Port(xmlURIPtr uri, const char **str)
333 const char *cur = *str;
334 int port = 0;
336 if (ISA_DIGIT(cur)) {
337 while (ISA_DIGIT(cur)) {
338 int digit = *cur - '0';
340 if (port > INT_MAX / 10)
341 return(1);
342 port *= 10;
343 if (port > INT_MAX - digit)
344 return(1);
345 port += digit;
347 cur++;
349 if (uri != NULL)
350 uri->port = port;
351 *str = cur;
352 return(0);
354 return(1);
358 * xmlParse3986Userinfo:
359 * @uri: pointer to an URI structure
360 * @str: the string to analyze
362 * Parse an user information part and fills in the appropriate fields
363 * of the @uri structure
365 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
367 * Returns 0 or the error code
369 static int
370 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
372 const char *cur;
374 cur = *str;
375 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
376 ISA_SUB_DELIM(cur) || (*cur == ':'))
377 NEXT(cur);
378 if (*cur == '@') {
379 if (uri != NULL) {
380 if (uri->user != NULL) xmlFree(uri->user);
381 if (uri->cleanup & 2)
382 uri->user = STRNDUP(*str, cur - *str);
383 else
384 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
386 *str = cur;
387 return(0);
389 return(1);
393 * xmlParse3986DecOctet:
394 * @str: the string to analyze
396 * dec-octet = DIGIT ; 0-9
397 * / %x31-39 DIGIT ; 10-99
398 * / "1" 2DIGIT ; 100-199
399 * / "2" %x30-34 DIGIT ; 200-249
400 * / "25" %x30-35 ; 250-255
402 * Skip a dec-octet.
404 * Returns 0 if found and skipped, 1 otherwise
406 static int
407 xmlParse3986DecOctet(const char **str) {
408 const char *cur = *str;
410 if (!(ISA_DIGIT(cur)))
411 return(1);
412 if (!ISA_DIGIT(cur+1))
413 cur++;
414 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
415 cur += 2;
416 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
417 cur += 3;
418 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
419 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
420 cur += 3;
421 else if ((*cur == '2') && (*(cur + 1) == '5') &&
422 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
423 cur += 3;
424 else
425 return(1);
426 *str = cur;
427 return(0);
430 * xmlParse3986Host:
431 * @uri: pointer to an URI structure
432 * @str: the string to analyze
434 * Parse an host part and fills in the appropriate fields
435 * of the @uri structure
437 * host = IP-literal / IPv4address / reg-name
438 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
439 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
440 * reg-name = *( unreserved / pct-encoded / sub-delims )
442 * Returns 0 or the error code
444 static int
445 xmlParse3986Host(xmlURIPtr uri, const char **str)
447 const char *cur = *str;
448 const char *host;
450 host = cur;
452 * IPv6 and future addressing scheme are enclosed between brackets
454 if (*cur == '[') {
455 cur++;
456 while ((*cur != ']') && (*cur != 0))
457 cur++;
458 if (*cur != ']')
459 return(1);
460 cur++;
461 goto found;
464 * try to parse an IPv4
466 if (ISA_DIGIT(cur)) {
467 if (xmlParse3986DecOctet(&cur) != 0)
468 goto not_ipv4;
469 if (*cur != '.')
470 goto not_ipv4;
471 cur++;
472 if (xmlParse3986DecOctet(&cur) != 0)
473 goto not_ipv4;
474 if (*cur != '.')
475 goto not_ipv4;
476 if (xmlParse3986DecOctet(&cur) != 0)
477 goto not_ipv4;
478 if (*cur != '.')
479 goto not_ipv4;
480 if (xmlParse3986DecOctet(&cur) != 0)
481 goto not_ipv4;
482 goto found;
483 not_ipv4:
484 cur = *str;
487 * then this should be a hostname which can be empty
489 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
490 NEXT(cur);
491 found:
492 if (uri != NULL) {
493 if (uri->authority != NULL) xmlFree(uri->authority);
494 uri->authority = NULL;
495 if (uri->server != NULL) xmlFree(uri->server);
496 if (cur != host) {
497 if (uri->cleanup & 2)
498 uri->server = STRNDUP(host, cur - host);
499 else
500 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
501 } else
502 uri->server = NULL;
504 *str = cur;
505 return(0);
509 * xmlParse3986Authority:
510 * @uri: pointer to an URI structure
511 * @str: the string to analyze
513 * Parse an authority part and fills in the appropriate fields
514 * of the @uri structure
516 * authority = [ userinfo "@" ] host [ ":" port ]
518 * Returns 0 or the error code
520 static int
521 xmlParse3986Authority(xmlURIPtr uri, const char **str)
523 const char *cur;
524 int ret;
526 cur = *str;
528 * try to parse an userinfo and check for the trailing @
530 ret = xmlParse3986Userinfo(uri, &cur);
531 if ((ret != 0) || (*cur != '@'))
532 cur = *str;
533 else
534 cur++;
535 ret = xmlParse3986Host(uri, &cur);
536 if (ret != 0) return(ret);
537 if (*cur == ':') {
538 cur++;
539 ret = xmlParse3986Port(uri, &cur);
540 if (ret != 0) return(ret);
542 *str = cur;
543 return(0);
547 * xmlParse3986Segment:
548 * @str: the string to analyze
549 * @forbid: an optional forbidden character
550 * @empty: allow an empty segment
552 * Parse a segment and fills in the appropriate fields
553 * of the @uri structure
555 * segment = *pchar
556 * segment-nz = 1*pchar
557 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
558 * ; non-zero-length segment without any colon ":"
560 * Returns 0 or the error code
562 static int
563 xmlParse3986Segment(const char **str, char forbid, int empty)
565 const char *cur;
567 cur = *str;
568 if (!ISA_PCHAR(cur)) {
569 if (empty)
570 return(0);
571 return(1);
573 while (ISA_PCHAR(cur) && (*cur != forbid))
574 NEXT(cur);
575 *str = cur;
576 return (0);
580 * xmlParse3986PathAbEmpty:
581 * @uri: pointer to an URI structure
582 * @str: the string to analyze
584 * Parse an path absolute or empty and fills in the appropriate fields
585 * of the @uri structure
587 * path-abempty = *( "/" segment )
589 * Returns 0 or the error code
591 static int
592 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
594 const char *cur;
595 int ret;
597 cur = *str;
599 while (*cur == '/') {
600 cur++;
601 ret = xmlParse3986Segment(&cur, 0, 1);
602 if (ret != 0) return(ret);
604 if (uri != NULL) {
605 if (uri->path != NULL) xmlFree(uri->path);
606 if (*str != cur) {
607 if (uri->cleanup & 2)
608 uri->path = STRNDUP(*str, cur - *str);
609 else
610 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
611 } else {
612 uri->path = NULL;
615 *str = cur;
616 return (0);
620 * xmlParse3986PathAbsolute:
621 * @uri: pointer to an URI structure
622 * @str: the string to analyze
624 * Parse an path absolute and fills in the appropriate fields
625 * of the @uri structure
627 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
629 * Returns 0 or the error code
631 static int
632 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
634 const char *cur;
635 int ret;
637 cur = *str;
639 if (*cur != '/')
640 return(1);
641 cur++;
642 ret = xmlParse3986Segment(&cur, 0, 0);
643 if (ret == 0) {
644 while (*cur == '/') {
645 cur++;
646 ret = xmlParse3986Segment(&cur, 0, 1);
647 if (ret != 0) return(ret);
650 if (uri != NULL) {
651 if (uri->path != NULL) xmlFree(uri->path);
652 if (cur != *str) {
653 if (uri->cleanup & 2)
654 uri->path = STRNDUP(*str, cur - *str);
655 else
656 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
657 } else {
658 uri->path = NULL;
661 *str = cur;
662 return (0);
666 * xmlParse3986PathRootless:
667 * @uri: pointer to an URI structure
668 * @str: the string to analyze
670 * Parse an path without root and fills in the appropriate fields
671 * of the @uri structure
673 * path-rootless = segment-nz *( "/" segment )
675 * Returns 0 or the error code
677 static int
678 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
680 const char *cur;
681 int ret;
683 cur = *str;
685 ret = xmlParse3986Segment(&cur, 0, 0);
686 if (ret != 0) return(ret);
687 while (*cur == '/') {
688 cur++;
689 ret = xmlParse3986Segment(&cur, 0, 1);
690 if (ret != 0) return(ret);
692 if (uri != NULL) {
693 if (uri->path != NULL) xmlFree(uri->path);
694 if (cur != *str) {
695 if (uri->cleanup & 2)
696 uri->path = STRNDUP(*str, cur - *str);
697 else
698 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
699 } else {
700 uri->path = NULL;
703 *str = cur;
704 return (0);
708 * xmlParse3986PathNoScheme:
709 * @uri: pointer to an URI structure
710 * @str: the string to analyze
712 * Parse an path which is not a scheme and fills in the appropriate fields
713 * of the @uri structure
715 * path-noscheme = segment-nz-nc *( "/" segment )
717 * Returns 0 or the error code
719 static int
720 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
722 const char *cur;
723 int ret;
725 cur = *str;
727 ret = xmlParse3986Segment(&cur, ':', 0);
728 if (ret != 0) return(ret);
729 while (*cur == '/') {
730 cur++;
731 ret = xmlParse3986Segment(&cur, 0, 1);
732 if (ret != 0) return(ret);
734 if (uri != NULL) {
735 if (uri->path != NULL) xmlFree(uri->path);
736 if (cur != *str) {
737 if (uri->cleanup & 2)
738 uri->path = STRNDUP(*str, cur - *str);
739 else
740 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
741 } else {
742 uri->path = NULL;
745 *str = cur;
746 return (0);
750 * xmlParse3986HierPart:
751 * @uri: pointer to an URI structure
752 * @str: the string to analyze
754 * Parse an hierarchical part and fills in the appropriate fields
755 * of the @uri structure
757 * hier-part = "//" authority path-abempty
758 * / path-absolute
759 * / path-rootless
760 * / path-empty
762 * Returns 0 or the error code
764 static int
765 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
767 const char *cur;
768 int ret;
770 cur = *str;
772 if ((*cur == '/') && (*(cur + 1) == '/')) {
773 cur += 2;
774 ret = xmlParse3986Authority(uri, &cur);
775 if (ret != 0) return(ret);
777 * An empty server is marked with a special URI value.
779 if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
780 uri->port = PORT_EMPTY_SERVER;
781 ret = xmlParse3986PathAbEmpty(uri, &cur);
782 if (ret != 0) return(ret);
783 *str = cur;
784 return(0);
785 } else if (*cur == '/') {
786 ret = xmlParse3986PathAbsolute(uri, &cur);
787 if (ret != 0) return(ret);
788 } else if (ISA_PCHAR(cur)) {
789 ret = xmlParse3986PathRootless(uri, &cur);
790 if (ret != 0) return(ret);
791 } else {
792 /* path-empty is effectively empty */
793 if (uri != NULL) {
794 if (uri->path != NULL) xmlFree(uri->path);
795 uri->path = NULL;
798 *str = cur;
799 return (0);
803 * xmlParse3986RelativeRef:
804 * @uri: pointer to an URI structure
805 * @str: the string to analyze
807 * Parse an URI string and fills in the appropriate fields
808 * of the @uri structure
810 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
811 * relative-part = "//" authority path-abempty
812 * / path-absolute
813 * / path-noscheme
814 * / path-empty
816 * Returns 0 or the error code
818 static int
819 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
820 int ret;
822 if ((*str == '/') && (*(str + 1) == '/')) {
823 str += 2;
824 ret = xmlParse3986Authority(uri, &str);
825 if (ret != 0) return(ret);
826 ret = xmlParse3986PathAbEmpty(uri, &str);
827 if (ret != 0) return(ret);
828 } else if (*str == '/') {
829 ret = xmlParse3986PathAbsolute(uri, &str);
830 if (ret != 0) return(ret);
831 } else if (ISA_PCHAR(str)) {
832 ret = xmlParse3986PathNoScheme(uri, &str);
833 if (ret != 0) return(ret);
834 } else {
835 /* path-empty is effectively empty */
836 if (uri != NULL) {
837 if (uri->path != NULL) xmlFree(uri->path);
838 uri->path = NULL;
842 if (*str == '?') {
843 str++;
844 ret = xmlParse3986Query(uri, &str);
845 if (ret != 0) return(ret);
847 if (*str == '#') {
848 str++;
849 ret = xmlParse3986Fragment(uri, &str);
850 if (ret != 0) return(ret);
852 if (*str != 0) {
853 xmlCleanURI(uri);
854 return(1);
856 return(0);
861 * xmlParse3986URI:
862 * @uri: pointer to an URI structure
863 * @str: the string to analyze
865 * Parse an URI string and fills in the appropriate fields
866 * of the @uri structure
868 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
870 * Returns 0 or the error code
872 static int
873 xmlParse3986URI(xmlURIPtr uri, const char *str) {
874 int ret;
876 ret = xmlParse3986Scheme(uri, &str);
877 if (ret != 0) return(ret);
878 if (*str != ':') {
879 return(1);
881 str++;
882 ret = xmlParse3986HierPart(uri, &str);
883 if (ret != 0) return(ret);
884 if (*str == '?') {
885 str++;
886 ret = xmlParse3986Query(uri, &str);
887 if (ret != 0) return(ret);
889 if (*str == '#') {
890 str++;
891 ret = xmlParse3986Fragment(uri, &str);
892 if (ret != 0) return(ret);
894 if (*str != 0) {
895 xmlCleanURI(uri);
896 return(1);
898 return(0);
902 * xmlParse3986URIReference:
903 * @uri: pointer to an URI structure
904 * @str: the string to analyze
906 * Parse an URI reference string and fills in the appropriate fields
907 * of the @uri structure
909 * URI-reference = URI / relative-ref
911 * Returns 0 or the error code
913 static int
914 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
915 int ret;
917 if (str == NULL)
918 return(-1);
919 xmlCleanURI(uri);
922 * Try first to parse absolute refs, then fallback to relative if
923 * it fails.
925 ret = xmlParse3986URI(uri, str);
926 if (ret != 0) {
927 xmlCleanURI(uri);
928 ret = xmlParse3986RelativeRef(uri, str);
929 if (ret != 0) {
930 xmlCleanURI(uri);
931 return(ret);
934 return(0);
938 * xmlParseURI:
939 * @str: the URI string to analyze
941 * Parse an URI based on RFC 3986
943 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
945 * Returns a newly built xmlURIPtr or NULL in case of error
947 xmlURIPtr
948 xmlParseURI(const char *str) {
949 xmlURIPtr uri;
950 int ret;
952 if (str == NULL)
953 return(NULL);
954 uri = xmlCreateURI();
955 if (uri != NULL) {
956 ret = xmlParse3986URIReference(uri, str);
957 if (ret) {
958 xmlFreeURI(uri);
959 return(NULL);
962 return(uri);
966 * xmlParseURIReference:
967 * @uri: pointer to an URI structure
968 * @str: the string to analyze
970 * Parse an URI reference string based on RFC 3986 and fills in the
971 * appropriate fields of the @uri structure
973 * URI-reference = URI / relative-ref
975 * Returns 0 or the error code
978 xmlParseURIReference(xmlURIPtr uri, const char *str) {
979 return(xmlParse3986URIReference(uri, str));
983 * xmlParseURIRaw:
984 * @str: the URI string to analyze
985 * @raw: if 1 unescaping of URI pieces are disabled
987 * Parse an URI but allows to keep intact the original fragments.
989 * URI-reference = URI / relative-ref
991 * Returns a newly built xmlURIPtr or NULL in case of error
993 xmlURIPtr
994 xmlParseURIRaw(const char *str, int raw) {
995 xmlURIPtr uri;
996 int ret;
998 if (str == NULL)
999 return(NULL);
1000 uri = xmlCreateURI();
1001 if (uri != NULL) {
1002 if (raw) {
1003 uri->cleanup |= 2;
1005 ret = xmlParseURIReference(uri, str);
1006 if (ret) {
1007 xmlFreeURI(uri);
1008 return(NULL);
1011 return(uri);
1014 /************************************************************************
1016 * Generic URI structure functions *
1018 ************************************************************************/
1021 * xmlCreateURI:
1023 * Simply creates an empty xmlURI
1025 * Returns the new structure or NULL in case of error
1027 xmlURIPtr
1028 xmlCreateURI(void) {
1029 xmlURIPtr ret;
1031 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1032 if (ret == NULL) {
1033 xmlURIErrMemory("creating URI structure\n");
1034 return(NULL);
1036 memset(ret, 0, sizeof(xmlURI));
1037 ret->port = PORT_EMPTY;
1038 return(ret);
1042 * xmlSaveUriRealloc:
1044 * Function to handle properly a reallocation when saving an URI
1045 * Also imposes some limit on the length of an URI string output
1047 static xmlChar *
1048 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1049 xmlChar *temp;
1050 int tmp;
1052 if (*max > MAX_URI_LENGTH) {
1053 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1054 return(NULL);
1056 tmp = *max * 2;
1057 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1058 if (temp == NULL) {
1059 xmlURIErrMemory("saving URI\n");
1060 return(NULL);
1062 *max = tmp;
1063 return(temp);
1067 * xmlSaveUri:
1068 * @uri: pointer to an xmlURI
1070 * Save the URI as an escaped string
1072 * Returns a new string (to be deallocated by caller)
1074 xmlChar *
1075 xmlSaveUri(xmlURIPtr uri) {
1076 xmlChar *ret = NULL;
1077 xmlChar *temp;
1078 const char *p;
1079 int len;
1080 int max;
1082 if (uri == NULL) return(NULL);
1085 max = 80;
1086 ret = (xmlChar *) xmlMallocAtomic(max + 1);
1087 if (ret == NULL) {
1088 xmlURIErrMemory("saving URI\n");
1089 return(NULL);
1091 len = 0;
1093 if (uri->scheme != NULL) {
1094 p = uri->scheme;
1095 while (*p != 0) {
1096 if (len >= max) {
1097 temp = xmlSaveUriRealloc(ret, &max);
1098 if (temp == NULL) goto mem_error;
1099 ret = temp;
1101 ret[len++] = *p++;
1103 if (len >= max) {
1104 temp = xmlSaveUriRealloc(ret, &max);
1105 if (temp == NULL) goto mem_error;
1106 ret = temp;
1108 ret[len++] = ':';
1110 if (uri->opaque != NULL) {
1111 p = uri->opaque;
1112 while (*p != 0) {
1113 if (len + 3 >= max) {
1114 temp = xmlSaveUriRealloc(ret, &max);
1115 if (temp == NULL) goto mem_error;
1116 ret = temp;
1118 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1119 ret[len++] = *p++;
1120 else {
1121 int val = *(unsigned char *)p++;
1122 int hi = val / 0x10, lo = val % 0x10;
1123 ret[len++] = '%';
1124 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1125 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1128 } else {
1129 if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1130 if (len + 3 >= max) {
1131 temp = xmlSaveUriRealloc(ret, &max);
1132 if (temp == NULL) goto mem_error;
1133 ret = temp;
1135 ret[len++] = '/';
1136 ret[len++] = '/';
1137 if (uri->user != NULL) {
1138 p = uri->user;
1139 while (*p != 0) {
1140 if (len + 3 >= max) {
1141 temp = xmlSaveUriRealloc(ret, &max);
1142 if (temp == NULL) goto mem_error;
1143 ret = temp;
1145 if ((IS_UNRESERVED(*(p))) ||
1146 ((*(p) == ';')) || ((*(p) == ':')) ||
1147 ((*(p) == '&')) || ((*(p) == '=')) ||
1148 ((*(p) == '+')) || ((*(p) == '$')) ||
1149 ((*(p) == ',')))
1150 ret[len++] = *p++;
1151 else {
1152 int val = *(unsigned char *)p++;
1153 int hi = val / 0x10, lo = val % 0x10;
1154 ret[len++] = '%';
1155 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1156 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1159 if (len + 3 >= max) {
1160 temp = xmlSaveUriRealloc(ret, &max);
1161 if (temp == NULL) goto mem_error;
1162 ret = temp;
1164 ret[len++] = '@';
1166 if (uri->server != NULL) {
1167 p = uri->server;
1168 while (*p != 0) {
1169 if (len >= max) {
1170 temp = xmlSaveUriRealloc(ret, &max);
1171 if (temp == NULL) goto mem_error;
1172 ret = temp;
1174 /* TODO: escaping? */
1175 ret[len++] = (xmlChar) *p++;
1178 if (uri->port > 0) {
1179 if (len + 10 >= max) {
1180 temp = xmlSaveUriRealloc(ret, &max);
1181 if (temp == NULL) goto mem_error;
1182 ret = temp;
1184 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1186 } else if (uri->authority != NULL) {
1187 if (len + 3 >= max) {
1188 temp = xmlSaveUriRealloc(ret, &max);
1189 if (temp == NULL) goto mem_error;
1190 ret = temp;
1192 ret[len++] = '/';
1193 ret[len++] = '/';
1194 p = uri->authority;
1195 while (*p != 0) {
1196 if (len + 3 >= max) {
1197 temp = xmlSaveUriRealloc(ret, &max);
1198 if (temp == NULL) goto mem_error;
1199 ret = temp;
1201 if ((IS_UNRESERVED(*(p))) ||
1202 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1203 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1204 ((*(p) == '=')) || ((*(p) == '+')))
1205 ret[len++] = *p++;
1206 else {
1207 int val = *(unsigned char *)p++;
1208 int hi = val / 0x10, lo = val % 0x10;
1209 ret[len++] = '%';
1210 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1211 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1214 } else if (uri->scheme != NULL) {
1215 if (len + 3 >= max) {
1216 temp = xmlSaveUriRealloc(ret, &max);
1217 if (temp == NULL) goto mem_error;
1218 ret = temp;
1221 if (uri->path != NULL) {
1222 p = uri->path;
1224 * the colon in file:///d: should not be escaped or
1225 * Windows accesses fail later.
1227 if ((uri->scheme != NULL) &&
1228 (p[0] == '/') &&
1229 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1230 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1231 (p[2] == ':') &&
1232 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1233 if (len + 3 >= max) {
1234 temp = xmlSaveUriRealloc(ret, &max);
1235 if (temp == NULL) goto mem_error;
1236 ret = temp;
1238 ret[len++] = *p++;
1239 ret[len++] = *p++;
1240 ret[len++] = *p++;
1242 while (*p != 0) {
1243 if (len + 3 >= max) {
1244 temp = xmlSaveUriRealloc(ret, &max);
1245 if (temp == NULL) goto mem_error;
1246 ret = temp;
1248 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1249 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1250 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1251 ((*(p) == ',')))
1252 ret[len++] = *p++;
1253 else {
1254 int val = *(unsigned char *)p++;
1255 int hi = val / 0x10, lo = val % 0x10;
1256 ret[len++] = '%';
1257 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1258 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1262 if (uri->query_raw != NULL) {
1263 if (len + 1 >= max) {
1264 temp = xmlSaveUriRealloc(ret, &max);
1265 if (temp == NULL) goto mem_error;
1266 ret = temp;
1268 ret[len++] = '?';
1269 p = uri->query_raw;
1270 while (*p != 0) {
1271 if (len + 1 >= max) {
1272 temp = xmlSaveUriRealloc(ret, &max);
1273 if (temp == NULL) goto mem_error;
1274 ret = temp;
1276 ret[len++] = *p++;
1278 } else if (uri->query != NULL) {
1279 if (len + 3 >= max) {
1280 temp = xmlSaveUriRealloc(ret, &max);
1281 if (temp == NULL) goto mem_error;
1282 ret = temp;
1284 ret[len++] = '?';
1285 p = uri->query;
1286 while (*p != 0) {
1287 if (len + 3 >= max) {
1288 temp = xmlSaveUriRealloc(ret, &max);
1289 if (temp == NULL) goto mem_error;
1290 ret = temp;
1292 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1293 ret[len++] = *p++;
1294 else {
1295 int val = *(unsigned char *)p++;
1296 int hi = val / 0x10, lo = val % 0x10;
1297 ret[len++] = '%';
1298 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1299 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1304 if (uri->fragment != NULL) {
1305 if (len + 3 >= max) {
1306 temp = xmlSaveUriRealloc(ret, &max);
1307 if (temp == NULL) goto mem_error;
1308 ret = temp;
1310 ret[len++] = '#';
1311 p = uri->fragment;
1312 while (*p != 0) {
1313 if (len + 3 >= max) {
1314 temp = xmlSaveUriRealloc(ret, &max);
1315 if (temp == NULL) goto mem_error;
1316 ret = temp;
1318 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1319 ret[len++] = *p++;
1320 else {
1321 int val = *(unsigned char *)p++;
1322 int hi = val / 0x10, lo = val % 0x10;
1323 ret[len++] = '%';
1324 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1325 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1329 if (len >= max) {
1330 temp = xmlSaveUriRealloc(ret, &max);
1331 if (temp == NULL) goto mem_error;
1332 ret = temp;
1334 ret[len] = 0;
1335 return(ret);
1337 mem_error:
1338 xmlFree(ret);
1339 return(NULL);
1343 * xmlPrintURI:
1344 * @stream: a FILE* for the output
1345 * @uri: pointer to an xmlURI
1347 * Prints the URI in the stream @stream.
1349 void
1350 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1351 xmlChar *out;
1353 out = xmlSaveUri(uri);
1354 if (out != NULL) {
1355 fprintf(stream, "%s", (char *) out);
1356 xmlFree(out);
1361 * xmlCleanURI:
1362 * @uri: pointer to an xmlURI
1364 * Make sure the xmlURI struct is free of content
1366 static void
1367 xmlCleanURI(xmlURIPtr uri) {
1368 if (uri == NULL) return;
1370 if (uri->scheme != NULL) xmlFree(uri->scheme);
1371 uri->scheme = NULL;
1372 if (uri->server != NULL) xmlFree(uri->server);
1373 uri->server = NULL;
1374 if (uri->user != NULL) xmlFree(uri->user);
1375 uri->user = NULL;
1376 if (uri->path != NULL) xmlFree(uri->path);
1377 uri->path = NULL;
1378 if (uri->fragment != NULL) xmlFree(uri->fragment);
1379 uri->fragment = NULL;
1380 if (uri->opaque != NULL) xmlFree(uri->opaque);
1381 uri->opaque = NULL;
1382 if (uri->authority != NULL) xmlFree(uri->authority);
1383 uri->authority = NULL;
1384 if (uri->query != NULL) xmlFree(uri->query);
1385 uri->query = NULL;
1386 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1387 uri->query_raw = NULL;
1391 * xmlFreeURI:
1392 * @uri: pointer to an xmlURI
1394 * Free up the xmlURI struct
1396 void
1397 xmlFreeURI(xmlURIPtr uri) {
1398 if (uri == NULL) return;
1400 if (uri->scheme != NULL) xmlFree(uri->scheme);
1401 if (uri->server != NULL) xmlFree(uri->server);
1402 if (uri->user != NULL) xmlFree(uri->user);
1403 if (uri->path != NULL) xmlFree(uri->path);
1404 if (uri->fragment != NULL) xmlFree(uri->fragment);
1405 if (uri->opaque != NULL) xmlFree(uri->opaque);
1406 if (uri->authority != NULL) xmlFree(uri->authority);
1407 if (uri->query != NULL) xmlFree(uri->query);
1408 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1409 xmlFree(uri);
1412 /************************************************************************
1414 * Helper functions *
1416 ************************************************************************/
1419 * xmlNormalizeURIPath:
1420 * @path: pointer to the path string
1422 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1423 * Section 5.2, steps 6.c through 6.g.
1425 * Normalization occurs directly on the string, no new allocation is done
1427 * Returns 0 or an error code
1430 xmlNormalizeURIPath(char *path) {
1431 char *cur, *out;
1433 if (path == NULL)
1434 return(-1);
1436 /* Skip all initial "/" chars. We want to get to the beginning of the
1437 * first non-empty segment.
1439 cur = path;
1440 while (cur[0] == '/')
1441 ++cur;
1442 if (cur[0] == '\0')
1443 return(0);
1445 /* Keep everything we've seen so far. */
1446 out = cur;
1449 * Analyze each segment in sequence for cases (c) and (d).
1451 while (cur[0] != '\0') {
1453 * c) All occurrences of "./", where "." is a complete path segment,
1454 * are removed from the buffer string.
1456 if ((cur[0] == '.') && (cur[1] == '/')) {
1457 cur += 2;
1458 /* '//' normalization should be done at this point too */
1459 while (cur[0] == '/')
1460 cur++;
1461 continue;
1465 * d) If the buffer string ends with "." as a complete path segment,
1466 * that "." is removed.
1468 if ((cur[0] == '.') && (cur[1] == '\0'))
1469 break;
1471 /* Otherwise keep the segment. */
1472 while (cur[0] != '/') {
1473 if (cur[0] == '\0')
1474 goto done_cd;
1475 (out++)[0] = (cur++)[0];
1477 /* normalize // */
1478 while ((cur[0] == '/') && (cur[1] == '/'))
1479 cur++;
1481 (out++)[0] = (cur++)[0];
1483 done_cd:
1484 out[0] = '\0';
1486 /* Reset to the beginning of the first segment for the next sequence. */
1487 cur = path;
1488 while (cur[0] == '/')
1489 ++cur;
1490 if (cur[0] == '\0')
1491 return(0);
1494 * Analyze each segment in sequence for cases (e) and (f).
1496 * e) All occurrences of "<segment>/../", where <segment> is a
1497 * complete path segment not equal to "..", are removed from the
1498 * buffer string. Removal of these path segments is performed
1499 * iteratively, removing the leftmost matching pattern on each
1500 * iteration, until no matching pattern remains.
1502 * f) If the buffer string ends with "<segment>/..", where <segment>
1503 * is a complete path segment not equal to "..", that
1504 * "<segment>/.." is removed.
1506 * To satisfy the "iterative" clause in (e), we need to collapse the
1507 * string every time we find something that needs to be removed. Thus,
1508 * we don't need to keep two pointers into the string: we only need a
1509 * "current position" pointer.
1511 while (1) {
1512 char *segp, *tmp;
1514 /* At the beginning of each iteration of this loop, "cur" points to
1515 * the first character of the segment we want to examine.
1518 /* Find the end of the current segment. */
1519 segp = cur;
1520 while ((segp[0] != '/') && (segp[0] != '\0'))
1521 ++segp;
1523 /* If this is the last segment, we're done (we need at least two
1524 * segments to meet the criteria for the (e) and (f) cases).
1526 if (segp[0] == '\0')
1527 break;
1529 /* If the first segment is "..", or if the next segment _isn't_ "..",
1530 * keep this segment and try the next one.
1532 ++segp;
1533 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1534 || ((segp[0] != '.') || (segp[1] != '.')
1535 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1536 cur = segp;
1537 continue;
1540 /* If we get here, remove this segment and the next one and back up
1541 * to the previous segment (if there is one), to implement the
1542 * "iteratively" clause. It's pretty much impossible to back up
1543 * while maintaining two pointers into the buffer, so just compact
1544 * the whole buffer now.
1547 /* If this is the end of the buffer, we're done. */
1548 if (segp[2] == '\0') {
1549 cur[0] = '\0';
1550 break;
1552 /* Valgrind complained, strcpy(cur, segp + 3); */
1553 /* string will overlap, do not use strcpy */
1554 tmp = cur;
1555 segp += 3;
1556 while ((*tmp++ = *segp++) != 0)
1559 /* If there are no previous segments, then keep going from here. */
1560 segp = cur;
1561 while ((segp > path) && ((--segp)[0] == '/'))
1563 if (segp == path)
1564 continue;
1566 /* "segp" is pointing to the end of a previous segment; find it's
1567 * start. We need to back up to the previous segment and start
1568 * over with that to handle things like "foo/bar/../..". If we
1569 * don't do this, then on the first pass we'll remove the "bar/..",
1570 * but be pointing at the second ".." so we won't realize we can also
1571 * remove the "foo/..".
1573 cur = segp;
1574 while ((cur > path) && (cur[-1] != '/'))
1575 --cur;
1577 out[0] = '\0';
1580 * g) If the resulting buffer string still begins with one or more
1581 * complete path segments of "..", then the reference is
1582 * considered to be in error. Implementations may handle this
1583 * error by retaining these components in the resolved path (i.e.,
1584 * treating them as part of the final URI), by removing them from
1585 * the resolved path (i.e., discarding relative levels above the
1586 * root), or by avoiding traversal of the reference.
1588 * We discard them from the final path.
1590 if (path[0] == '/') {
1591 cur = path;
1592 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1593 && ((cur[3] == '/') || (cur[3] == '\0')))
1594 cur += 3;
1596 if (cur != path) {
1597 out = path;
1598 while (cur[0] != '\0')
1599 (out++)[0] = (cur++)[0];
1600 out[0] = 0;
1604 return(0);
1607 static int is_hex(char c) {
1608 if (((c >= '0') && (c <= '9')) ||
1609 ((c >= 'a') && (c <= 'f')) ||
1610 ((c >= 'A') && (c <= 'F')))
1611 return(1);
1612 return(0);
1616 * xmlURIUnescapeString:
1617 * @str: the string to unescape
1618 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1619 * @target: optional destination buffer
1621 * Unescaping routine, but does not check that the string is an URI. The
1622 * output is a direct unsigned char translation of %XX values (no encoding)
1623 * Note that the length of the result can only be smaller or same size as
1624 * the input string.
1626 * Returns a copy of the string, but unescaped, will return NULL only in case
1627 * of error
1629 char *
1630 xmlURIUnescapeString(const char *str, int len, char *target) {
1631 char *ret, *out;
1632 const char *in;
1634 if (str == NULL)
1635 return(NULL);
1636 if (len <= 0) len = strlen(str);
1637 if (len < 0) return(NULL);
1639 if (target == NULL) {
1640 ret = (char *) xmlMallocAtomic(len + 1);
1641 if (ret == NULL) {
1642 xmlURIErrMemory("unescaping URI value\n");
1643 return(NULL);
1645 } else
1646 ret = target;
1647 in = str;
1648 out = ret;
1649 while(len > 0) {
1650 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1651 int c = 0;
1652 in++;
1653 if ((*in >= '0') && (*in <= '9'))
1654 c = (*in - '0');
1655 else if ((*in >= 'a') && (*in <= 'f'))
1656 c = (*in - 'a') + 10;
1657 else if ((*in >= 'A') && (*in <= 'F'))
1658 c = (*in - 'A') + 10;
1659 in++;
1660 if ((*in >= '0') && (*in <= '9'))
1661 c = c * 16 + (*in - '0');
1662 else if ((*in >= 'a') && (*in <= 'f'))
1663 c = c * 16 + (*in - 'a') + 10;
1664 else if ((*in >= 'A') && (*in <= 'F'))
1665 c = c * 16 + (*in - 'A') + 10;
1666 in++;
1667 len -= 3;
1668 /* Explicit sign change */
1669 *out++ = (char) c;
1670 } else {
1671 *out++ = *in++;
1672 len--;
1675 *out = 0;
1676 return(ret);
1680 * xmlURIEscapeStr:
1681 * @str: string to escape
1682 * @list: exception list string of chars not to escape
1684 * This routine escapes a string to hex, ignoring reserved characters
1685 * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1687 * Returns a new escaped string or NULL in case of error.
1689 xmlChar *
1690 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1691 xmlChar *ret, ch;
1692 xmlChar *temp;
1693 const xmlChar *in;
1694 int len, out;
1696 if (str == NULL)
1697 return(NULL);
1698 if (str[0] == 0)
1699 return(xmlStrdup(str));
1700 len = xmlStrlen(str);
1701 if (!(len > 0)) return(NULL);
1703 len += 20;
1704 ret = (xmlChar *) xmlMallocAtomic(len);
1705 if (ret == NULL) {
1706 xmlURIErrMemory("escaping URI value\n");
1707 return(NULL);
1709 in = (const xmlChar *) str;
1710 out = 0;
1711 while(*in != 0) {
1712 if (len - out <= 3) {
1713 temp = xmlSaveUriRealloc(ret, &len);
1714 if (temp == NULL) {
1715 xmlURIErrMemory("escaping URI value\n");
1716 xmlFree(ret);
1717 return(NULL);
1719 ret = temp;
1722 ch = *in;
1724 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1725 unsigned char val;
1726 ret[out++] = '%';
1727 val = ch >> 4;
1728 if (val <= 9)
1729 ret[out++] = '0' + val;
1730 else
1731 ret[out++] = 'A' + val - 0xA;
1732 val = ch & 0xF;
1733 if (val <= 9)
1734 ret[out++] = '0' + val;
1735 else
1736 ret[out++] = 'A' + val - 0xA;
1737 in++;
1738 } else {
1739 ret[out++] = *in++;
1743 ret[out] = 0;
1744 return(ret);
1748 * xmlURIEscape:
1749 * @str: the string of the URI to escape
1751 * Escaping routine, does not do validity checks !
1752 * It will try to escape the chars needing this, but this is heuristic
1753 * based it's impossible to be sure.
1755 * Returns an copy of the string, but escaped
1757 * 25 May 2001
1758 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1759 * according to RFC2396.
1760 * - Carl Douglas
1762 xmlChar *
1763 xmlURIEscape(const xmlChar * str)
1765 xmlChar *ret, *segment = NULL;
1766 xmlURIPtr uri;
1767 int ret2;
1769 if (str == NULL)
1770 return (NULL);
1772 uri = xmlCreateURI();
1773 if (uri != NULL) {
1775 * Allow escaping errors in the unescaped form
1777 uri->cleanup = 1;
1778 ret2 = xmlParseURIReference(uri, (const char *)str);
1779 if (ret2) {
1780 xmlFreeURI(uri);
1781 return (NULL);
1785 if (!uri)
1786 return NULL;
1788 ret = NULL;
1790 #define NULLCHK(p) if(!p) { \
1791 xmlURIErrMemory("escaping URI value\n"); \
1792 xmlFreeURI(uri); \
1793 xmlFree(ret); \
1794 return NULL; } \
1796 if (uri->scheme) {
1797 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1798 NULLCHK(segment)
1799 ret = xmlStrcat(ret, segment);
1800 ret = xmlStrcat(ret, BAD_CAST ":");
1801 xmlFree(segment);
1804 if (uri->authority) {
1805 segment =
1806 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1807 NULLCHK(segment)
1808 ret = xmlStrcat(ret, BAD_CAST "//");
1809 ret = xmlStrcat(ret, segment);
1810 xmlFree(segment);
1813 if (uri->user) {
1814 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1815 NULLCHK(segment)
1816 ret = xmlStrcat(ret,BAD_CAST "//");
1817 ret = xmlStrcat(ret, segment);
1818 ret = xmlStrcat(ret, BAD_CAST "@");
1819 xmlFree(segment);
1822 if (uri->server) {
1823 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1824 NULLCHK(segment)
1825 if (uri->user == NULL)
1826 ret = xmlStrcat(ret, BAD_CAST "//");
1827 ret = xmlStrcat(ret, segment);
1828 xmlFree(segment);
1831 if (uri->port > 0) {
1832 xmlChar port[11];
1834 snprintf((char *) port, 11, "%d", uri->port);
1835 ret = xmlStrcat(ret, BAD_CAST ":");
1836 ret = xmlStrcat(ret, port);
1839 if (uri->path) {
1840 segment =
1841 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1842 NULLCHK(segment)
1843 ret = xmlStrcat(ret, segment);
1844 xmlFree(segment);
1847 if (uri->query_raw) {
1848 ret = xmlStrcat(ret, BAD_CAST "?");
1849 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1851 else if (uri->query) {
1852 segment =
1853 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1854 NULLCHK(segment)
1855 ret = xmlStrcat(ret, BAD_CAST "?");
1856 ret = xmlStrcat(ret, segment);
1857 xmlFree(segment);
1860 if (uri->opaque) {
1861 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1862 NULLCHK(segment)
1863 ret = xmlStrcat(ret, segment);
1864 xmlFree(segment);
1867 if (uri->fragment) {
1868 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1869 NULLCHK(segment)
1870 ret = xmlStrcat(ret, BAD_CAST "#");
1871 ret = xmlStrcat(ret, segment);
1872 xmlFree(segment);
1875 xmlFreeURI(uri);
1876 #undef NULLCHK
1878 return (ret);
1881 /************************************************************************
1883 * Public functions *
1885 ************************************************************************/
1888 * xmlBuildURI:
1889 * @URI: the URI instance found in the document
1890 * @base: the base value
1892 * Computes he final URI of the reference done by checking that
1893 * the given URI is valid, and building the final URI using the
1894 * base URI. This is processed according to section 5.2 of the
1895 * RFC 2396
1897 * 5.2. Resolving Relative References to Absolute Form
1899 * Returns a new URI string (to be freed by the caller) or NULL in case
1900 * of error.
1902 xmlChar *
1903 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1904 xmlChar *val = NULL;
1905 int ret, len, indx, cur, out;
1906 xmlURIPtr ref = NULL;
1907 xmlURIPtr bas = NULL;
1908 xmlURIPtr res = NULL;
1911 * 1) The URI reference is parsed into the potential four components and
1912 * fragment identifier, as described in Section 4.3.
1914 * NOTE that a completely empty URI is treated by modern browsers
1915 * as a reference to "." rather than as a synonym for the current
1916 * URI. Should we do that here?
1918 if (URI == NULL)
1919 ret = -1;
1920 else {
1921 if (*URI) {
1922 ref = xmlCreateURI();
1923 if (ref == NULL)
1924 goto done;
1925 ret = xmlParseURIReference(ref, (const char *) URI);
1927 else
1928 ret = 0;
1930 if (ret != 0)
1931 goto done;
1932 if ((ref != NULL) && (ref->scheme != NULL)) {
1934 * The URI is absolute don't modify.
1936 val = xmlStrdup(URI);
1937 goto done;
1939 if (base == NULL)
1940 ret = -1;
1941 else {
1942 bas = xmlCreateURI();
1943 if (bas == NULL)
1944 goto done;
1945 ret = xmlParseURIReference(bas, (const char *) base);
1947 if (ret != 0) {
1948 if (ref)
1949 val = xmlSaveUri(ref);
1950 goto done;
1952 if (ref == NULL) {
1954 * the base fragment must be ignored
1956 if (bas->fragment != NULL) {
1957 xmlFree(bas->fragment);
1958 bas->fragment = NULL;
1960 val = xmlSaveUri(bas);
1961 goto done;
1965 * 2) If the path component is empty and the scheme, authority, and
1966 * query components are undefined, then it is a reference to the
1967 * current document and we are done. Otherwise, the reference URI's
1968 * query and fragment components are defined as found (or not found)
1969 * within the URI reference and not inherited from the base URI.
1971 * NOTE that in modern browsers, the parsing differs from the above
1972 * in the following aspect: the query component is allowed to be
1973 * defined while still treating this as a reference to the current
1974 * document.
1976 res = xmlCreateURI();
1977 if (res == NULL)
1978 goto done;
1979 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1980 ((ref->authority == NULL) && (ref->server == NULL) &&
1981 (ref->port == PORT_EMPTY))) {
1982 if (bas->scheme != NULL)
1983 res->scheme = xmlMemStrdup(bas->scheme);
1984 if (bas->authority != NULL)
1985 res->authority = xmlMemStrdup(bas->authority);
1986 else {
1987 if (bas->server != NULL)
1988 res->server = xmlMemStrdup(bas->server);
1989 if (bas->user != NULL)
1990 res->user = xmlMemStrdup(bas->user);
1991 res->port = bas->port;
1993 if (bas->path != NULL)
1994 res->path = xmlMemStrdup(bas->path);
1995 if (ref->query_raw != NULL)
1996 res->query_raw = xmlMemStrdup (ref->query_raw);
1997 else if (ref->query != NULL)
1998 res->query = xmlMemStrdup(ref->query);
1999 else if (bas->query_raw != NULL)
2000 res->query_raw = xmlMemStrdup(bas->query_raw);
2001 else if (bas->query != NULL)
2002 res->query = xmlMemStrdup(bas->query);
2003 if (ref->fragment != NULL)
2004 res->fragment = xmlMemStrdup(ref->fragment);
2005 goto step_7;
2009 * 3) If the scheme component is defined, indicating that the reference
2010 * starts with a scheme name, then the reference is interpreted as an
2011 * absolute URI and we are done. Otherwise, the reference URI's
2012 * scheme is inherited from the base URI's scheme component.
2014 if (ref->scheme != NULL) {
2015 val = xmlSaveUri(ref);
2016 goto done;
2018 if (bas->scheme != NULL)
2019 res->scheme = xmlMemStrdup(bas->scheme);
2021 if (ref->query_raw != NULL)
2022 res->query_raw = xmlMemStrdup(ref->query_raw);
2023 else if (ref->query != NULL)
2024 res->query = xmlMemStrdup(ref->query);
2025 if (ref->fragment != NULL)
2026 res->fragment = xmlMemStrdup(ref->fragment);
2029 * 4) If the authority component is defined, then the reference is a
2030 * network-path and we skip to step 7. Otherwise, the reference
2031 * URI's authority is inherited from the base URI's authority
2032 * component, which will also be undefined if the URI scheme does not
2033 * use an authority component.
2035 if ((ref->authority != NULL) || (ref->server != NULL) ||
2036 (ref->port != PORT_EMPTY)) {
2037 if (ref->authority != NULL)
2038 res->authority = xmlMemStrdup(ref->authority);
2039 else {
2040 if (ref->server != NULL)
2041 res->server = xmlMemStrdup(ref->server);
2042 if (ref->user != NULL)
2043 res->user = xmlMemStrdup(ref->user);
2044 res->port = ref->port;
2046 if (ref->path != NULL)
2047 res->path = xmlMemStrdup(ref->path);
2048 goto step_7;
2050 if (bas->authority != NULL)
2051 res->authority = xmlMemStrdup(bas->authority);
2052 else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2053 if (bas->server != NULL)
2054 res->server = xmlMemStrdup(bas->server);
2055 if (bas->user != NULL)
2056 res->user = xmlMemStrdup(bas->user);
2057 res->port = bas->port;
2061 * 5) If the path component begins with a slash character ("/"), then
2062 * the reference is an absolute-path and we skip to step 7.
2064 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2065 res->path = xmlMemStrdup(ref->path);
2066 goto step_7;
2071 * 6) If this step is reached, then we are resolving a relative-path
2072 * reference. The relative path needs to be merged with the base
2073 * URI's path. Although there are many ways to do this, we will
2074 * describe a simple method using a separate string buffer.
2076 * Allocate a buffer large enough for the result string.
2078 len = 2; /* extra / and 0 */
2079 if (ref->path != NULL)
2080 len += strlen(ref->path);
2081 if (bas->path != NULL)
2082 len += strlen(bas->path);
2083 res->path = (char *) xmlMallocAtomic(len);
2084 if (res->path == NULL) {
2085 xmlURIErrMemory("resolving URI against base\n");
2086 goto done;
2088 res->path[0] = 0;
2091 * a) All but the last segment of the base URI's path component is
2092 * copied to the buffer. In other words, any characters after the
2093 * last (right-most) slash character, if any, are excluded.
2095 cur = 0;
2096 out = 0;
2097 if (bas->path != NULL) {
2098 while (bas->path[cur] != 0) {
2099 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2100 cur++;
2101 if (bas->path[cur] == 0)
2102 break;
2104 cur++;
2105 while (out < cur) {
2106 res->path[out] = bas->path[out];
2107 out++;
2111 res->path[out] = 0;
2114 * b) The reference's path component is appended to the buffer
2115 * string.
2117 if (ref->path != NULL && ref->path[0] != 0) {
2118 indx = 0;
2120 * Ensure the path includes a '/'
2122 if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2123 res->path[out++] = '/';
2124 while (ref->path[indx] != 0) {
2125 res->path[out++] = ref->path[indx++];
2128 res->path[out] = 0;
2131 * Steps c) to h) are really path normalization steps
2133 xmlNormalizeURIPath(res->path);
2135 step_7:
2138 * 7) The resulting URI components, including any inherited from the
2139 * base URI, are recombined to give the absolute form of the URI
2140 * reference.
2142 val = xmlSaveUri(res);
2144 done:
2145 if (ref != NULL)
2146 xmlFreeURI(ref);
2147 if (bas != NULL)
2148 xmlFreeURI(bas);
2149 if (res != NULL)
2150 xmlFreeURI(res);
2151 return(val);
2155 * xmlBuildRelativeURI:
2156 * @URI: the URI reference under consideration
2157 * @base: the base value
2159 * Expresses the URI of the reference in terms relative to the
2160 * base. Some examples of this operation include:
2161 * base = "http://site1.com/docs/book1.html"
2162 * URI input URI returned
2163 * docs/pic1.gif pic1.gif
2164 * docs/img/pic1.gif img/pic1.gif
2165 * img/pic1.gif ../img/pic1.gif
2166 * http://site1.com/docs/pic1.gif pic1.gif
2167 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2169 * base = "docs/book1.html"
2170 * URI input URI returned
2171 * docs/pic1.gif pic1.gif
2172 * docs/img/pic1.gif img/pic1.gif
2173 * img/pic1.gif ../img/pic1.gif
2174 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2177 * Note: if the URI reference is really weird or complicated, it may be
2178 * worthwhile to first convert it into a "nice" one by calling
2179 * xmlBuildURI (using 'base') before calling this routine,
2180 * since this routine (for reasonable efficiency) assumes URI has
2181 * already been through some validation.
2183 * Returns a new URI string (to be freed by the caller) or NULL in case
2184 * error.
2186 xmlChar *
2187 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2189 xmlChar *val = NULL;
2190 int ret;
2191 int ix;
2192 int nbslash = 0;
2193 int len;
2194 xmlURIPtr ref = NULL;
2195 xmlURIPtr bas = NULL;
2196 xmlChar *bptr, *uptr, *vptr;
2197 int remove_path = 0;
2199 if ((URI == NULL) || (*URI == 0))
2200 return NULL;
2203 * First parse URI into a standard form
2205 ref = xmlCreateURI ();
2206 if (ref == NULL)
2207 return NULL;
2208 /* If URI not already in "relative" form */
2209 if (URI[0] != '.') {
2210 ret = xmlParseURIReference (ref, (const char *) URI);
2211 if (ret != 0)
2212 goto done; /* Error in URI, return NULL */
2213 } else
2214 ref->path = (char *)xmlStrdup(URI);
2217 * Next parse base into the same standard form
2219 if ((base == NULL) || (*base == 0)) {
2220 val = xmlStrdup (URI);
2221 goto done;
2223 bas = xmlCreateURI ();
2224 if (bas == NULL)
2225 goto done;
2226 if (base[0] != '.') {
2227 ret = xmlParseURIReference (bas, (const char *) base);
2228 if (ret != 0)
2229 goto done; /* Error in base, return NULL */
2230 } else
2231 bas->path = (char *)xmlStrdup(base);
2234 * If the scheme / server on the URI differs from the base,
2235 * just return the URI
2237 if ((ref->scheme != NULL) &&
2238 ((bas->scheme == NULL) ||
2239 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2240 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2241 (bas->port != ref->port))) {
2242 val = xmlStrdup (URI);
2243 goto done;
2245 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2246 val = xmlStrdup(BAD_CAST "");
2247 goto done;
2249 if (bas->path == NULL) {
2250 val = xmlStrdup((xmlChar *)ref->path);
2251 goto done;
2253 if (ref->path == NULL) {
2254 ref->path = (char *) "/";
2255 remove_path = 1;
2259 * At this point (at last!) we can compare the two paths
2261 * First we take care of the special case where either of the
2262 * two path components may be missing (bug 316224)
2264 bptr = (xmlChar *)bas->path;
2266 xmlChar *rptr = (xmlChar *) ref->path;
2267 int pos = 0;
2270 * Next we compare the two strings and find where they first differ
2272 if ((*rptr == '.') && (rptr[1] == '/'))
2273 rptr += 2;
2274 if ((*bptr == '.') && (bptr[1] == '/'))
2275 bptr += 2;
2276 else if ((*bptr == '/') && (*rptr != '/'))
2277 bptr++;
2278 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2279 pos++;
2281 if (bptr[pos] == rptr[pos]) {
2282 val = xmlStrdup(BAD_CAST "");
2283 goto done; /* (I can't imagine why anyone would do this) */
2287 * In URI, "back up" to the last '/' encountered. This will be the
2288 * beginning of the "unique" suffix of URI
2290 ix = pos;
2291 for (; ix > 0; ix--) {
2292 if (rptr[ix - 1] == '/')
2293 break;
2295 uptr = (xmlChar *)&rptr[ix];
2298 * In base, count the number of '/' from the differing point
2300 for (; bptr[ix] != 0; ix++) {
2301 if (bptr[ix] == '/')
2302 nbslash++;
2306 * e.g: URI="foo/" base="foo/bar" -> "./"
2308 if (nbslash == 0 && !uptr[0]) {
2309 val = xmlStrdup(BAD_CAST "./");
2310 goto done;
2313 len = xmlStrlen (uptr) + 1;
2316 if (nbslash == 0) {
2317 if (uptr != NULL)
2318 /* exception characters from xmlSaveUri */
2319 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2320 goto done;
2324 * Allocate just enough space for the returned string -
2325 * length of the remainder of the URI, plus enough space
2326 * for the "../" groups, plus one for the terminator
2328 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2329 if (val == NULL) {
2330 xmlURIErrMemory("building relative URI\n");
2331 goto done;
2333 vptr = val;
2335 * Put in as many "../" as needed
2337 for (; nbslash>0; nbslash--) {
2338 *vptr++ = '.';
2339 *vptr++ = '.';
2340 *vptr++ = '/';
2343 * Finish up with the end of the URI
2345 if (uptr != NULL) {
2346 if ((vptr > val) && (len > 0) &&
2347 (uptr[0] == '/') && (vptr[-1] == '/')) {
2348 memcpy (vptr, uptr + 1, len - 1);
2349 vptr[len - 2] = 0;
2350 } else {
2351 memcpy (vptr, uptr, len);
2352 vptr[len - 1] = 0;
2354 } else {
2355 vptr[len - 1] = 0;
2358 /* escape the freshly-built path */
2359 vptr = val;
2360 /* exception characters from xmlSaveUri */
2361 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2362 xmlFree(vptr);
2364 done:
2366 * Free the working variables
2368 if (remove_path != 0)
2369 ref->path = NULL;
2370 if (ref != NULL)
2371 xmlFreeURI (ref);
2372 if (bas != NULL)
2373 xmlFreeURI (bas);
2375 return val;
2379 * xmlCanonicPath:
2380 * @path: the resource locator in a filesystem notation
2382 * Constructs a canonic path from the specified path.
2384 * Returns a new canonic path, or a duplicate of the path parameter if the
2385 * construction fails. The caller is responsible for freeing the memory occupied
2386 * by the returned string. If there is insufficient memory available, or the
2387 * argument is NULL, the function returns NULL.
2389 #define IS_WINDOWS_PATH(p) \
2390 ((p != NULL) && \
2391 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2392 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2393 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2394 xmlChar *
2395 xmlCanonicPath(const xmlChar *path)
2398 * For Windows implementations, additional work needs to be done to
2399 * replace backslashes in pathnames with "forward slashes"
2401 #if defined(_WIN32)
2402 int len = 0;
2403 char *p = NULL;
2404 #endif
2405 xmlURIPtr uri;
2406 xmlChar *ret;
2407 const xmlChar *absuri;
2409 if (path == NULL)
2410 return(NULL);
2412 #if defined(_WIN32)
2414 * We must not change the backslashes to slashes if the the path
2415 * starts with \\?\
2416 * Those paths can be up to 32k characters long.
2417 * Was added specifically for OpenOffice, those paths can't be converted
2418 * to URIs anyway.
2420 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2421 (path[3] == '\\') )
2422 return xmlStrdup((const xmlChar *) path);
2423 #endif
2425 /* sanitize filename starting with // so it can be used as URI */
2426 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2427 path++;
2429 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2430 xmlFreeURI(uri);
2431 return xmlStrdup(path);
2434 /* Check if this is an "absolute uri" */
2435 absuri = xmlStrstr(path, BAD_CAST "://");
2436 if (absuri != NULL) {
2437 int l, j;
2438 unsigned char c;
2439 xmlChar *escURI;
2442 * this looks like an URI where some parts have not been
2443 * escaped leading to a parsing problem. Check that the first
2444 * part matches a protocol.
2446 l = absuri - path;
2447 /* Bypass if first part (part before the '://') is > 20 chars */
2448 if ((l <= 0) || (l > 20))
2449 goto path_processing;
2450 /* Bypass if any non-alpha characters are present in first part */
2451 for (j = 0;j < l;j++) {
2452 c = path[j];
2453 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2454 goto path_processing;
2457 /* Escape all except the characters specified in the supplied path */
2458 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2459 if (escURI != NULL) {
2460 /* Try parsing the escaped path */
2461 uri = xmlParseURI((const char *) escURI);
2462 /* If successful, return the escaped string */
2463 if (uri != NULL) {
2464 xmlFreeURI(uri);
2465 return escURI;
2467 xmlFree(escURI);
2471 path_processing:
2472 /* For Windows implementations, replace backslashes with 'forward slashes' */
2473 #if defined(_WIN32)
2475 * Create a URI structure
2477 uri = xmlCreateURI();
2478 if (uri == NULL) { /* Guard against 'out of memory' */
2479 return(NULL);
2482 len = xmlStrlen(path);
2483 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2484 /* make the scheme 'file' */
2485 uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2486 /* allocate space for leading '/' + path + string terminator */
2487 uri->path = xmlMallocAtomic(len + 2);
2488 if (uri->path == NULL) {
2489 xmlFreeURI(uri); /* Guard against 'out of memory' */
2490 return(NULL);
2492 /* Put in leading '/' plus path */
2493 uri->path[0] = '/';
2494 p = uri->path + 1;
2495 strncpy(p, (char *) path, len + 1);
2496 } else {
2497 uri->path = (char *) xmlStrdup(path);
2498 if (uri->path == NULL) {
2499 xmlFreeURI(uri);
2500 return(NULL);
2502 p = uri->path;
2504 /* Now change all occurrences of '\' to '/' */
2505 while (*p != '\0') {
2506 if (*p == '\\')
2507 *p = '/';
2508 p++;
2511 if (uri->scheme == NULL) {
2512 ret = xmlStrdup((const xmlChar *) uri->path);
2513 } else {
2514 ret = xmlSaveUri(uri);
2517 xmlFreeURI(uri);
2518 #else
2519 ret = xmlStrdup((const xmlChar *) path);
2520 #endif
2521 return(ret);
2525 * xmlPathToURI:
2526 * @path: the resource locator in a filesystem notation
2528 * Constructs an URI expressing the existing path
2530 * Returns a new URI, or a duplicate of the path parameter if the
2531 * construction fails. The caller is responsible for freeing the memory
2532 * occupied by the returned string. If there is insufficient memory available,
2533 * or the argument is NULL, the function returns NULL.
2535 xmlChar *
2536 xmlPathToURI(const xmlChar *path)
2538 xmlURIPtr uri;
2539 xmlURI temp;
2540 xmlChar *ret, *cal;
2542 if (path == NULL)
2543 return(NULL);
2545 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2546 xmlFreeURI(uri);
2547 return xmlStrdup(path);
2549 cal = xmlCanonicPath(path);
2550 if (cal == NULL)
2551 return(NULL);
2552 #if defined(_WIN32)
2553 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2554 If 'cal' is a valid URI already then we are done here, as continuing would make
2555 it invalid. */
2556 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2557 xmlFreeURI(uri);
2558 return cal;
2560 /* 'cal' can contain a relative path with backslashes. If that is processed
2561 by xmlSaveURI, they will be escaped and the external entity loader machinery
2562 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2563 ret = cal;
2564 while (*ret != '\0') {
2565 if (*ret == '\\')
2566 *ret = '/';
2567 ret++;
2569 #endif
2570 memset(&temp, 0, sizeof(temp));
2571 temp.path = (char *) cal;
2572 ret = xmlSaveUri(&temp);
2573 xmlFree(cal);
2574 return(ret);