2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
22 #include "private/error.h"
27 * The definition of the URI regexp in the above RFC has no size limit
28 * In practice they are usually relatively short except for the
29 * data URI scheme as defined in RFC 2397. Even for data URI the usual
30 * maximum size before hitting random practical limits is around 64 KB
31 * and 4KB is usually a maximum admitted limit for proper operations.
32 * The value below is more a security limit than anything else and
33 * really should never be hit by 'normal' operations
34 * Set to 1 MByte in 2012, this is only enforced on output
36 #define MAX_URI_LENGTH 1024 * 1024
39 #define PORT_EMPTY_SERVER -1
42 xmlURIErrMemory(const char *extra
)
45 __xmlRaiseError(NULL
, NULL
, NULL
,
46 NULL
, NULL
, XML_FROM_URI
,
47 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
48 extra
, NULL
, NULL
, 0, 0,
49 "Memory allocation failed : %s\n", extra
);
51 __xmlRaiseError(NULL
, NULL
, NULL
,
52 NULL
, NULL
, XML_FROM_URI
,
53 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
54 NULL
, NULL
, NULL
, 0, 0,
55 "Memory allocation failed\n");
58 static void xmlCleanURI(xmlURIPtr uri
);
61 * Old rule from 2396 used in legacy handling code
62 * alpha = lowalpha | upalpha
64 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
68 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
69 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
70 * "u" | "v" | "w" | "x" | "y" | "z"
73 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
76 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
77 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
78 * "U" | "V" | "W" | "X" | "Y" | "Z"
80 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
86 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
88 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
91 * alphanum = alpha | digit
94 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
97 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
100 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
101 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
102 ((x) == '(') || ((x) == ')'))
105 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
108 #define IS_UNWISE(p) \
109 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
110 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
111 ((*(p) == ']')) || ((*(p) == '`')))
113 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
117 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
118 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
119 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
123 * unreserved = alphanum | mark
126 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
129 * Skip to next pointer char, handle escaped sequences
132 #define NEXT(p) ((*p == '%')? p += 3 : p++)
135 * Productions from the spec.
137 * authority = server | reg_name
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
141 * path = [ abs_path | opaque_part ]
144 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
146 /************************************************************************
150 ************************************************************************/
152 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
153 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
154 ((*(p) >= 'A') && (*(p) <= 'Z')))
155 #define ISA_HEXDIG(p) \
156 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
157 ((*(p) >= 'A') && (*(p) <= 'F')))
160 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
161 * / "*" / "+" / "," / ";" / "="
163 #define ISA_SUB_DELIM(p) \
164 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
165 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
166 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
167 ((*(p) == '=')) || ((*(p) == '\'')))
170 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
172 #define ISA_GEN_DELIM(p) \
173 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
174 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
178 * reserved = gen-delims / sub-delims
180 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
183 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
185 #define ISA_UNRESERVED(p) \
186 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
187 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
190 * pct-encoded = "%" HEXDIG HEXDIG
192 #define ISA_PCT_ENCODED(p) \
193 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
196 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
198 #define ISA_PCHAR(p) \
199 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
200 ((*(p) == ':')) || ((*(p) == '@')))
203 * xmlParse3986Scheme:
204 * @uri: pointer to an URI structure
205 * @str: pointer to the string to analyze
207 * Parse an URI scheme
209 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
211 * Returns 0 or the error code
214 xmlParse3986Scheme(xmlURIPtr uri
, const char **str
) {
224 while (ISA_ALPHA(cur
) || ISA_DIGIT(cur
) ||
225 (*cur
== '+') || (*cur
== '-') || (*cur
== '.')) cur
++;
227 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
228 uri
->scheme
= STRNDUP(*str
, cur
- *str
);
235 * xmlParse3986Fragment:
236 * @uri: pointer to an URI structure
237 * @str: pointer to the string to analyze
239 * Parse the query part of an URI
241 * fragment = *( pchar / "/" / "?" )
242 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
243 * in the fragment identifier but this is used very broadly for
244 * xpointer scheme selection, so we are allowing it here to not break
245 * for example all the DocBook processing chains.
247 * Returns 0 or the error code
250 xmlParse3986Fragment(xmlURIPtr uri
, const char **str
)
259 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
260 (*cur
== '[') || (*cur
== ']') ||
261 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
264 if (uri
->fragment
!= NULL
)
265 xmlFree(uri
->fragment
);
266 if (uri
->cleanup
& 2)
267 uri
->fragment
= STRNDUP(*str
, cur
- *str
);
269 uri
->fragment
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
277 * @uri: pointer to an URI structure
278 * @str: pointer to the string to analyze
280 * Parse the query part of an URI
284 * Returns 0 or the error code
287 xmlParse3986Query(xmlURIPtr uri
, const char **str
)
296 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
297 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
300 if (uri
->query
!= NULL
)
302 if (uri
->cleanup
& 2)
303 uri
->query
= STRNDUP(*str
, cur
- *str
);
305 uri
->query
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
307 /* Save the raw bytes of the query as well.
308 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
310 if (uri
->query_raw
!= NULL
)
311 xmlFree (uri
->query_raw
);
312 uri
->query_raw
= STRNDUP (*str
, cur
- *str
);
320 * @uri: pointer to an URI structure
321 * @str: the string to analyze
323 * Parse a port part and fills in the appropriate fields
324 * of the @uri structure
328 * Returns 0 or the error code
331 xmlParse3986Port(xmlURIPtr uri
, const char **str
)
333 const char *cur
= *str
;
336 if (ISA_DIGIT(cur
)) {
337 while (ISA_DIGIT(cur
)) {
338 int digit
= *cur
- '0';
340 if (port
> INT_MAX
/ 10)
343 if (port
> INT_MAX
- digit
)
358 * xmlParse3986Userinfo:
359 * @uri: pointer to an URI structure
360 * @str: the string to analyze
362 * Parse an user information part and fills in the appropriate fields
363 * of the @uri structure
365 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
367 * Returns 0 or the error code
370 xmlParse3986Userinfo(xmlURIPtr uri
, const char **str
)
375 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) ||
376 ISA_SUB_DELIM(cur
) || (*cur
== ':'))
380 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
381 if (uri
->cleanup
& 2)
382 uri
->user
= STRNDUP(*str
, cur
- *str
);
384 uri
->user
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
393 * xmlParse3986DecOctet:
394 * @str: the string to analyze
396 * dec-octet = DIGIT ; 0-9
397 * / %x31-39 DIGIT ; 10-99
398 * / "1" 2DIGIT ; 100-199
399 * / "2" %x30-34 DIGIT ; 200-249
400 * / "25" %x30-35 ; 250-255
404 * Returns 0 if found and skipped, 1 otherwise
407 xmlParse3986DecOctet(const char **str
) {
408 const char *cur
= *str
;
410 if (!(ISA_DIGIT(cur
)))
412 if (!ISA_DIGIT(cur
+1))
414 else if ((*cur
!= '0') && (ISA_DIGIT(cur
+ 1)) && (!ISA_DIGIT(cur
+2)))
416 else if ((*cur
== '1') && (ISA_DIGIT(cur
+ 1)) && (ISA_DIGIT(cur
+ 2)))
418 else if ((*cur
== '2') && (*(cur
+ 1) >= '0') &&
419 (*(cur
+ 1) <= '4') && (ISA_DIGIT(cur
+ 2)))
421 else if ((*cur
== '2') && (*(cur
+ 1) == '5') &&
422 (*(cur
+ 2) >= '0') && (*(cur
+ 1) <= '5'))
431 * @uri: pointer to an URI structure
432 * @str: the string to analyze
434 * Parse an host part and fills in the appropriate fields
435 * of the @uri structure
437 * host = IP-literal / IPv4address / reg-name
438 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
439 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
440 * reg-name = *( unreserved / pct-encoded / sub-delims )
442 * Returns 0 or the error code
445 xmlParse3986Host(xmlURIPtr uri
, const char **str
)
447 const char *cur
= *str
;
452 * IPv6 and future addressing scheme are enclosed between brackets
456 while ((*cur
!= ']') && (*cur
!= 0))
464 * try to parse an IPv4
466 if (ISA_DIGIT(cur
)) {
467 if (xmlParse3986DecOctet(&cur
) != 0)
472 if (xmlParse3986DecOctet(&cur
) != 0)
476 if (xmlParse3986DecOctet(&cur
) != 0)
480 if (xmlParse3986DecOctet(&cur
) != 0)
487 * then this should be a hostname which can be empty
489 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) || ISA_SUB_DELIM(cur
))
493 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
494 uri
->authority
= NULL
;
495 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
497 if (uri
->cleanup
& 2)
498 uri
->server
= STRNDUP(host
, cur
- host
);
500 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
509 * xmlParse3986Authority:
510 * @uri: pointer to an URI structure
511 * @str: the string to analyze
513 * Parse an authority part and fills in the appropriate fields
514 * of the @uri structure
516 * authority = [ userinfo "@" ] host [ ":" port ]
518 * Returns 0 or the error code
521 xmlParse3986Authority(xmlURIPtr uri
, const char **str
)
528 * try to parse an userinfo and check for the trailing @
530 ret
= xmlParse3986Userinfo(uri
, &cur
);
531 if ((ret
!= 0) || (*cur
!= '@'))
535 ret
= xmlParse3986Host(uri
, &cur
);
536 if (ret
!= 0) return(ret
);
539 ret
= xmlParse3986Port(uri
, &cur
);
540 if (ret
!= 0) return(ret
);
547 * xmlParse3986Segment:
548 * @str: the string to analyze
549 * @forbid: an optional forbidden character
550 * @empty: allow an empty segment
552 * Parse a segment and fills in the appropriate fields
553 * of the @uri structure
556 * segment-nz = 1*pchar
557 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
558 * ; non-zero-length segment without any colon ":"
560 * Returns 0 or the error code
563 xmlParse3986Segment(const char **str
, char forbid
, int empty
)
568 if (!ISA_PCHAR(cur
)) {
573 while (ISA_PCHAR(cur
) && (*cur
!= forbid
))
580 * xmlParse3986PathAbEmpty:
581 * @uri: pointer to an URI structure
582 * @str: the string to analyze
584 * Parse an path absolute or empty and fills in the appropriate fields
585 * of the @uri structure
587 * path-abempty = *( "/" segment )
589 * Returns 0 or the error code
592 xmlParse3986PathAbEmpty(xmlURIPtr uri
, const char **str
)
599 while (*cur
== '/') {
601 ret
= xmlParse3986Segment(&cur
, 0, 1);
602 if (ret
!= 0) return(ret
);
605 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
607 if (uri
->cleanup
& 2)
608 uri
->path
= STRNDUP(*str
, cur
- *str
);
610 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
620 * xmlParse3986PathAbsolute:
621 * @uri: pointer to an URI structure
622 * @str: the string to analyze
624 * Parse an path absolute and fills in the appropriate fields
625 * of the @uri structure
627 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
629 * Returns 0 or the error code
632 xmlParse3986PathAbsolute(xmlURIPtr uri
, const char **str
)
642 ret
= xmlParse3986Segment(&cur
, 0, 0);
644 while (*cur
== '/') {
646 ret
= xmlParse3986Segment(&cur
, 0, 1);
647 if (ret
!= 0) return(ret
);
651 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
653 if (uri
->cleanup
& 2)
654 uri
->path
= STRNDUP(*str
, cur
- *str
);
656 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
666 * xmlParse3986PathRootless:
667 * @uri: pointer to an URI structure
668 * @str: the string to analyze
670 * Parse an path without root and fills in the appropriate fields
671 * of the @uri structure
673 * path-rootless = segment-nz *( "/" segment )
675 * Returns 0 or the error code
678 xmlParse3986PathRootless(xmlURIPtr uri
, const char **str
)
685 ret
= xmlParse3986Segment(&cur
, 0, 0);
686 if (ret
!= 0) return(ret
);
687 while (*cur
== '/') {
689 ret
= xmlParse3986Segment(&cur
, 0, 1);
690 if (ret
!= 0) return(ret
);
693 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
695 if (uri
->cleanup
& 2)
696 uri
->path
= STRNDUP(*str
, cur
- *str
);
698 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
708 * xmlParse3986PathNoScheme:
709 * @uri: pointer to an URI structure
710 * @str: the string to analyze
712 * Parse an path which is not a scheme and fills in the appropriate fields
713 * of the @uri structure
715 * path-noscheme = segment-nz-nc *( "/" segment )
717 * Returns 0 or the error code
720 xmlParse3986PathNoScheme(xmlURIPtr uri
, const char **str
)
727 ret
= xmlParse3986Segment(&cur
, ':', 0);
728 if (ret
!= 0) return(ret
);
729 while (*cur
== '/') {
731 ret
= xmlParse3986Segment(&cur
, 0, 1);
732 if (ret
!= 0) return(ret
);
735 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
737 if (uri
->cleanup
& 2)
738 uri
->path
= STRNDUP(*str
, cur
- *str
);
740 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
750 * xmlParse3986HierPart:
751 * @uri: pointer to an URI structure
752 * @str: the string to analyze
754 * Parse an hierarchical part and fills in the appropriate fields
755 * of the @uri structure
757 * hier-part = "//" authority path-abempty
762 * Returns 0 or the error code
765 xmlParse3986HierPart(xmlURIPtr uri
, const char **str
)
772 if ((*cur
== '/') && (*(cur
+ 1) == '/')) {
774 ret
= xmlParse3986Authority(uri
, &cur
);
775 if (ret
!= 0) return(ret
);
777 * An empty server is marked with a special URI value.
779 if ((uri
->server
== NULL
) && (uri
->port
== PORT_EMPTY
))
780 uri
->port
= PORT_EMPTY_SERVER
;
781 ret
= xmlParse3986PathAbEmpty(uri
, &cur
);
782 if (ret
!= 0) return(ret
);
785 } else if (*cur
== '/') {
786 ret
= xmlParse3986PathAbsolute(uri
, &cur
);
787 if (ret
!= 0) return(ret
);
788 } else if (ISA_PCHAR(cur
)) {
789 ret
= xmlParse3986PathRootless(uri
, &cur
);
790 if (ret
!= 0) return(ret
);
792 /* path-empty is effectively empty */
794 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
803 * xmlParse3986RelativeRef:
804 * @uri: pointer to an URI structure
805 * @str: the string to analyze
807 * Parse an URI string and fills in the appropriate fields
808 * of the @uri structure
810 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
811 * relative-part = "//" authority path-abempty
816 * Returns 0 or the error code
819 xmlParse3986RelativeRef(xmlURIPtr uri
, const char *str
) {
822 if ((*str
== '/') && (*(str
+ 1) == '/')) {
824 ret
= xmlParse3986Authority(uri
, &str
);
825 if (ret
!= 0) return(ret
);
826 ret
= xmlParse3986PathAbEmpty(uri
, &str
);
827 if (ret
!= 0) return(ret
);
828 } else if (*str
== '/') {
829 ret
= xmlParse3986PathAbsolute(uri
, &str
);
830 if (ret
!= 0) return(ret
);
831 } else if (ISA_PCHAR(str
)) {
832 ret
= xmlParse3986PathNoScheme(uri
, &str
);
833 if (ret
!= 0) return(ret
);
835 /* path-empty is effectively empty */
837 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
844 ret
= xmlParse3986Query(uri
, &str
);
845 if (ret
!= 0) return(ret
);
849 ret
= xmlParse3986Fragment(uri
, &str
);
850 if (ret
!= 0) return(ret
);
862 * @uri: pointer to an URI structure
863 * @str: the string to analyze
865 * Parse an URI string and fills in the appropriate fields
866 * of the @uri structure
868 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
870 * Returns 0 or the error code
873 xmlParse3986URI(xmlURIPtr uri
, const char *str
) {
876 ret
= xmlParse3986Scheme(uri
, &str
);
877 if (ret
!= 0) return(ret
);
882 ret
= xmlParse3986HierPart(uri
, &str
);
883 if (ret
!= 0) return(ret
);
886 ret
= xmlParse3986Query(uri
, &str
);
887 if (ret
!= 0) return(ret
);
891 ret
= xmlParse3986Fragment(uri
, &str
);
892 if (ret
!= 0) return(ret
);
902 * xmlParse3986URIReference:
903 * @uri: pointer to an URI structure
904 * @str: the string to analyze
906 * Parse an URI reference string and fills in the appropriate fields
907 * of the @uri structure
909 * URI-reference = URI / relative-ref
911 * Returns 0 or the error code
914 xmlParse3986URIReference(xmlURIPtr uri
, const char *str
) {
922 * Try first to parse absolute refs, then fallback to relative if
925 ret
= xmlParse3986URI(uri
, str
);
928 ret
= xmlParse3986RelativeRef(uri
, str
);
939 * @str: the URI string to analyze
941 * Parse an URI based on RFC 3986
943 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
945 * Returns a newly built xmlURIPtr or NULL in case of error
948 xmlParseURI(const char *str
) {
954 uri
= xmlCreateURI();
956 ret
= xmlParse3986URIReference(uri
, str
);
966 * xmlParseURIReference:
967 * @uri: pointer to an URI structure
968 * @str: the string to analyze
970 * Parse an URI reference string based on RFC 3986 and fills in the
971 * appropriate fields of the @uri structure
973 * URI-reference = URI / relative-ref
975 * Returns 0 or the error code
978 xmlParseURIReference(xmlURIPtr uri
, const char *str
) {
979 return(xmlParse3986URIReference(uri
, str
));
984 * @str: the URI string to analyze
985 * @raw: if 1 unescaping of URI pieces are disabled
987 * Parse an URI but allows to keep intact the original fragments.
989 * URI-reference = URI / relative-ref
991 * Returns a newly built xmlURIPtr or NULL in case of error
994 xmlParseURIRaw(const char *str
, int raw
) {
1000 uri
= xmlCreateURI();
1005 ret
= xmlParseURIReference(uri
, str
);
1014 /************************************************************************
1016 * Generic URI structure functions *
1018 ************************************************************************/
1023 * Simply creates an empty xmlURI
1025 * Returns the new structure or NULL in case of error
1028 xmlCreateURI(void) {
1031 ret
= (xmlURIPtr
) xmlMalloc(sizeof(xmlURI
));
1033 xmlURIErrMemory("creating URI structure\n");
1036 memset(ret
, 0, sizeof(xmlURI
));
1037 ret
->port
= PORT_EMPTY
;
1042 * xmlSaveUriRealloc:
1044 * Function to handle properly a reallocation when saving an URI
1045 * Also imposes some limit on the length of an URI string output
1048 xmlSaveUriRealloc(xmlChar
*ret
, int *max
) {
1052 if (*max
> MAX_URI_LENGTH
) {
1053 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1057 temp
= (xmlChar
*) xmlRealloc(ret
, (tmp
+ 1));
1059 xmlURIErrMemory("saving URI\n");
1068 * @uri: pointer to an xmlURI
1070 * Save the URI as an escaped string
1072 * Returns a new string (to be deallocated by caller)
1075 xmlSaveUri(xmlURIPtr uri
) {
1076 xmlChar
*ret
= NULL
;
1082 if (uri
== NULL
) return(NULL
);
1086 ret
= (xmlChar
*) xmlMallocAtomic(max
+ 1);
1088 xmlURIErrMemory("saving URI\n");
1093 if (uri
->scheme
!= NULL
) {
1097 temp
= xmlSaveUriRealloc(ret
, &max
);
1098 if (temp
== NULL
) goto mem_error
;
1104 temp
= xmlSaveUriRealloc(ret
, &max
);
1105 if (temp
== NULL
) goto mem_error
;
1110 if (uri
->opaque
!= NULL
) {
1113 if (len
+ 3 >= max
) {
1114 temp
= xmlSaveUriRealloc(ret
, &max
);
1115 if (temp
== NULL
) goto mem_error
;
1118 if (IS_RESERVED(*(p
)) || IS_UNRESERVED(*(p
)))
1121 int val
= *(unsigned char *)p
++;
1122 int hi
= val
/ 0x10, lo
= val
% 0x10;
1124 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1125 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1129 if ((uri
->server
!= NULL
) || (uri
->port
!= PORT_EMPTY
)) {
1130 if (len
+ 3 >= max
) {
1131 temp
= xmlSaveUriRealloc(ret
, &max
);
1132 if (temp
== NULL
) goto mem_error
;
1137 if (uri
->user
!= NULL
) {
1140 if (len
+ 3 >= max
) {
1141 temp
= xmlSaveUriRealloc(ret
, &max
);
1142 if (temp
== NULL
) goto mem_error
;
1145 if ((IS_UNRESERVED(*(p
))) ||
1146 ((*(p
) == ';')) || ((*(p
) == ':')) ||
1147 ((*(p
) == '&')) || ((*(p
) == '=')) ||
1148 ((*(p
) == '+')) || ((*(p
) == '$')) ||
1152 int val
= *(unsigned char *)p
++;
1153 int hi
= val
/ 0x10, lo
= val
% 0x10;
1155 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1156 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1159 if (len
+ 3 >= max
) {
1160 temp
= xmlSaveUriRealloc(ret
, &max
);
1161 if (temp
== NULL
) goto mem_error
;
1166 if (uri
->server
!= NULL
) {
1170 temp
= xmlSaveUriRealloc(ret
, &max
);
1171 if (temp
== NULL
) goto mem_error
;
1174 /* TODO: escaping? */
1175 ret
[len
++] = (xmlChar
) *p
++;
1178 if (uri
->port
> 0) {
1179 if (len
+ 10 >= max
) {
1180 temp
= xmlSaveUriRealloc(ret
, &max
);
1181 if (temp
== NULL
) goto mem_error
;
1184 len
+= snprintf((char *) &ret
[len
], max
- len
, ":%d", uri
->port
);
1186 } else if (uri
->authority
!= NULL
) {
1187 if (len
+ 3 >= max
) {
1188 temp
= xmlSaveUriRealloc(ret
, &max
);
1189 if (temp
== NULL
) goto mem_error
;
1196 if (len
+ 3 >= max
) {
1197 temp
= xmlSaveUriRealloc(ret
, &max
);
1198 if (temp
== NULL
) goto mem_error
;
1201 if ((IS_UNRESERVED(*(p
))) ||
1202 ((*(p
) == '$')) || ((*(p
) == ',')) || ((*(p
) == ';')) ||
1203 ((*(p
) == ':')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1204 ((*(p
) == '=')) || ((*(p
) == '+')))
1207 int val
= *(unsigned char *)p
++;
1208 int hi
= val
/ 0x10, lo
= val
% 0x10;
1210 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1211 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1214 } else if (uri
->scheme
!= NULL
) {
1215 if (len
+ 3 >= max
) {
1216 temp
= xmlSaveUriRealloc(ret
, &max
);
1217 if (temp
== NULL
) goto mem_error
;
1221 if (uri
->path
!= NULL
) {
1224 * the colon in file:///d: should not be escaped or
1225 * Windows accesses fail later.
1227 if ((uri
->scheme
!= NULL
) &&
1229 (((p
[1] >= 'a') && (p
[1] <= 'z')) ||
1230 ((p
[1] >= 'A') && (p
[1] <= 'Z'))) &&
1232 (xmlStrEqual(BAD_CAST uri
->scheme
, BAD_CAST
"file"))) {
1233 if (len
+ 3 >= max
) {
1234 temp
= xmlSaveUriRealloc(ret
, &max
);
1235 if (temp
== NULL
) goto mem_error
;
1243 if (len
+ 3 >= max
) {
1244 temp
= xmlSaveUriRealloc(ret
, &max
);
1245 if (temp
== NULL
) goto mem_error
;
1248 if ((IS_UNRESERVED(*(p
))) || ((*(p
) == '/')) ||
1249 ((*(p
) == ';')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1250 ((*(p
) == '=')) || ((*(p
) == '+')) || ((*(p
) == '$')) ||
1254 int val
= *(unsigned char *)p
++;
1255 int hi
= val
/ 0x10, lo
= val
% 0x10;
1257 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1258 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1262 if (uri
->query_raw
!= NULL
) {
1263 if (len
+ 1 >= max
) {
1264 temp
= xmlSaveUriRealloc(ret
, &max
);
1265 if (temp
== NULL
) goto mem_error
;
1271 if (len
+ 1 >= max
) {
1272 temp
= xmlSaveUriRealloc(ret
, &max
);
1273 if (temp
== NULL
) goto mem_error
;
1278 } else if (uri
->query
!= NULL
) {
1279 if (len
+ 3 >= max
) {
1280 temp
= xmlSaveUriRealloc(ret
, &max
);
1281 if (temp
== NULL
) goto mem_error
;
1287 if (len
+ 3 >= max
) {
1288 temp
= xmlSaveUriRealloc(ret
, &max
);
1289 if (temp
== NULL
) goto mem_error
;
1292 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1295 int val
= *(unsigned char *)p
++;
1296 int hi
= val
/ 0x10, lo
= val
% 0x10;
1298 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1299 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1304 if (uri
->fragment
!= NULL
) {
1305 if (len
+ 3 >= max
) {
1306 temp
= xmlSaveUriRealloc(ret
, &max
);
1307 if (temp
== NULL
) goto mem_error
;
1313 if (len
+ 3 >= max
) {
1314 temp
= xmlSaveUriRealloc(ret
, &max
);
1315 if (temp
== NULL
) goto mem_error
;
1318 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1321 int val
= *(unsigned char *)p
++;
1322 int hi
= val
/ 0x10, lo
= val
% 0x10;
1324 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1325 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1330 temp
= xmlSaveUriRealloc(ret
, &max
);
1331 if (temp
== NULL
) goto mem_error
;
1344 * @stream: a FILE* for the output
1345 * @uri: pointer to an xmlURI
1347 * Prints the URI in the stream @stream.
1350 xmlPrintURI(FILE *stream
, xmlURIPtr uri
) {
1353 out
= xmlSaveUri(uri
);
1355 fprintf(stream
, "%s", (char *) out
);
1362 * @uri: pointer to an xmlURI
1364 * Make sure the xmlURI struct is free of content
1367 xmlCleanURI(xmlURIPtr uri
) {
1368 if (uri
== NULL
) return;
1370 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1372 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1374 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1376 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1378 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1379 uri
->fragment
= NULL
;
1380 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1382 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1383 uri
->authority
= NULL
;
1384 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1386 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1387 uri
->query_raw
= NULL
;
1392 * @uri: pointer to an xmlURI
1394 * Free up the xmlURI struct
1397 xmlFreeURI(xmlURIPtr uri
) {
1398 if (uri
== NULL
) return;
1400 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1401 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1402 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1403 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1404 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1405 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1406 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1407 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1408 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1412 /************************************************************************
1414 * Helper functions *
1416 ************************************************************************/
1419 * xmlNormalizeURIPath:
1420 * @path: pointer to the path string
1422 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1423 * Section 5.2, steps 6.c through 6.g.
1425 * Normalization occurs directly on the string, no new allocation is done
1427 * Returns 0 or an error code
1430 xmlNormalizeURIPath(char *path
) {
1436 /* Skip all initial "/" chars. We want to get to the beginning of the
1437 * first non-empty segment.
1440 while (cur
[0] == '/')
1445 /* Keep everything we've seen so far. */
1449 * Analyze each segment in sequence for cases (c) and (d).
1451 while (cur
[0] != '\0') {
1453 * c) All occurrences of "./", where "." is a complete path segment,
1454 * are removed from the buffer string.
1456 if ((cur
[0] == '.') && (cur
[1] == '/')) {
1458 /* '//' normalization should be done at this point too */
1459 while (cur
[0] == '/')
1465 * d) If the buffer string ends with "." as a complete path segment,
1466 * that "." is removed.
1468 if ((cur
[0] == '.') && (cur
[1] == '\0'))
1471 /* Otherwise keep the segment. */
1472 while (cur
[0] != '/') {
1475 (out
++)[0] = (cur
++)[0];
1478 while ((cur
[0] == '/') && (cur
[1] == '/'))
1481 (out
++)[0] = (cur
++)[0];
1486 /* Reset to the beginning of the first segment for the next sequence. */
1488 while (cur
[0] == '/')
1494 * Analyze each segment in sequence for cases (e) and (f).
1496 * e) All occurrences of "<segment>/../", where <segment> is a
1497 * complete path segment not equal to "..", are removed from the
1498 * buffer string. Removal of these path segments is performed
1499 * iteratively, removing the leftmost matching pattern on each
1500 * iteration, until no matching pattern remains.
1502 * f) If the buffer string ends with "<segment>/..", where <segment>
1503 * is a complete path segment not equal to "..", that
1504 * "<segment>/.." is removed.
1506 * To satisfy the "iterative" clause in (e), we need to collapse the
1507 * string every time we find something that needs to be removed. Thus,
1508 * we don't need to keep two pointers into the string: we only need a
1509 * "current position" pointer.
1514 /* At the beginning of each iteration of this loop, "cur" points to
1515 * the first character of the segment we want to examine.
1518 /* Find the end of the current segment. */
1520 while ((segp
[0] != '/') && (segp
[0] != '\0'))
1523 /* If this is the last segment, we're done (we need at least two
1524 * segments to meet the criteria for the (e) and (f) cases).
1526 if (segp
[0] == '\0')
1529 /* If the first segment is "..", or if the next segment _isn't_ "..",
1530 * keep this segment and try the next one.
1533 if (((cur
[0] == '.') && (cur
[1] == '.') && (segp
== cur
+3))
1534 || ((segp
[0] != '.') || (segp
[1] != '.')
1535 || ((segp
[2] != '/') && (segp
[2] != '\0')))) {
1540 /* If we get here, remove this segment and the next one and back up
1541 * to the previous segment (if there is one), to implement the
1542 * "iteratively" clause. It's pretty much impossible to back up
1543 * while maintaining two pointers into the buffer, so just compact
1544 * the whole buffer now.
1547 /* If this is the end of the buffer, we're done. */
1548 if (segp
[2] == '\0') {
1552 /* Valgrind complained, strcpy(cur, segp + 3); */
1553 /* string will overlap, do not use strcpy */
1556 while ((*tmp
++ = *segp
++) != 0)
1559 /* If there are no previous segments, then keep going from here. */
1561 while ((segp
> path
) && ((--segp
)[0] == '/'))
1566 /* "segp" is pointing to the end of a previous segment; find it's
1567 * start. We need to back up to the previous segment and start
1568 * over with that to handle things like "foo/bar/../..". If we
1569 * don't do this, then on the first pass we'll remove the "bar/..",
1570 * but be pointing at the second ".." so we won't realize we can also
1571 * remove the "foo/..".
1574 while ((cur
> path
) && (cur
[-1] != '/'))
1580 * g) If the resulting buffer string still begins with one or more
1581 * complete path segments of "..", then the reference is
1582 * considered to be in error. Implementations may handle this
1583 * error by retaining these components in the resolved path (i.e.,
1584 * treating them as part of the final URI), by removing them from
1585 * the resolved path (i.e., discarding relative levels above the
1586 * root), or by avoiding traversal of the reference.
1588 * We discard them from the final path.
1590 if (path
[0] == '/') {
1592 while ((cur
[0] == '/') && (cur
[1] == '.') && (cur
[2] == '.')
1593 && ((cur
[3] == '/') || (cur
[3] == '\0')))
1598 while (cur
[0] != '\0')
1599 (out
++)[0] = (cur
++)[0];
1607 static int is_hex(char c
) {
1608 if (((c
>= '0') && (c
<= '9')) ||
1609 ((c
>= 'a') && (c
<= 'f')) ||
1610 ((c
>= 'A') && (c
<= 'F')))
1616 * xmlURIUnescapeString:
1617 * @str: the string to unescape
1618 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1619 * @target: optional destination buffer
1621 * Unescaping routine, but does not check that the string is an URI. The
1622 * output is a direct unsigned char translation of %XX values (no encoding)
1623 * Note that the length of the result can only be smaller or same size as
1626 * Returns a copy of the string, but unescaped, will return NULL only in case
1630 xmlURIUnescapeString(const char *str
, int len
, char *target
) {
1636 if (len
<= 0) len
= strlen(str
);
1637 if (len
< 0) return(NULL
);
1639 if (target
== NULL
) {
1640 ret
= (char *) xmlMallocAtomic(len
+ 1);
1642 xmlURIErrMemory("unescaping URI value\n");
1650 if ((len
> 2) && (*in
== '%') && (is_hex(in
[1])) && (is_hex(in
[2]))) {
1653 if ((*in
>= '0') && (*in
<= '9'))
1655 else if ((*in
>= 'a') && (*in
<= 'f'))
1656 c
= (*in
- 'a') + 10;
1657 else if ((*in
>= 'A') && (*in
<= 'F'))
1658 c
= (*in
- 'A') + 10;
1660 if ((*in
>= '0') && (*in
<= '9'))
1661 c
= c
* 16 + (*in
- '0');
1662 else if ((*in
>= 'a') && (*in
<= 'f'))
1663 c
= c
* 16 + (*in
- 'a') + 10;
1664 else if ((*in
>= 'A') && (*in
<= 'F'))
1665 c
= c
* 16 + (*in
- 'A') + 10;
1668 /* Explicit sign change */
1681 * @str: string to escape
1682 * @list: exception list string of chars not to escape
1684 * This routine escapes a string to hex, ignoring reserved characters
1685 * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1687 * Returns a new escaped string or NULL in case of error.
1690 xmlURIEscapeStr(const xmlChar
*str
, const xmlChar
*list
) {
1699 return(xmlStrdup(str
));
1700 len
= xmlStrlen(str
);
1701 if (!(len
> 0)) return(NULL
);
1704 ret
= (xmlChar
*) xmlMallocAtomic(len
);
1706 xmlURIErrMemory("escaping URI value\n");
1709 in
= (const xmlChar
*) str
;
1712 if (len
- out
<= 3) {
1713 temp
= xmlSaveUriRealloc(ret
, &len
);
1715 xmlURIErrMemory("escaping URI value\n");
1724 if ((ch
!= '@') && (!IS_UNRESERVED(ch
)) && (!xmlStrchr(list
, ch
))) {
1729 ret
[out
++] = '0' + val
;
1731 ret
[out
++] = 'A' + val
- 0xA;
1734 ret
[out
++] = '0' + val
;
1736 ret
[out
++] = 'A' + val
- 0xA;
1749 * @str: the string of the URI to escape
1751 * Escaping routine, does not do validity checks !
1752 * It will try to escape the chars needing this, but this is heuristic
1753 * based it's impossible to be sure.
1755 * Returns an copy of the string, but escaped
1758 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1759 * according to RFC2396.
1763 xmlURIEscape(const xmlChar
* str
)
1765 xmlChar
*ret
, *segment
= NULL
;
1772 uri
= xmlCreateURI();
1775 * Allow escaping errors in the unescaped form
1778 ret2
= xmlParseURIReference(uri
, (const char *)str
);
1790 #define NULLCHK(p) if(!p) { \
1791 xmlURIErrMemory("escaping URI value\n"); \
1797 segment
= xmlURIEscapeStr(BAD_CAST uri
->scheme
, BAD_CAST
"+-.");
1799 ret
= xmlStrcat(ret
, segment
);
1800 ret
= xmlStrcat(ret
, BAD_CAST
":");
1804 if (uri
->authority
) {
1806 xmlURIEscapeStr(BAD_CAST uri
->authority
, BAD_CAST
"/?;:@");
1808 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1809 ret
= xmlStrcat(ret
, segment
);
1814 segment
= xmlURIEscapeStr(BAD_CAST uri
->user
, BAD_CAST
";:&=+$,");
1816 ret
= xmlStrcat(ret
,BAD_CAST
"//");
1817 ret
= xmlStrcat(ret
, segment
);
1818 ret
= xmlStrcat(ret
, BAD_CAST
"@");
1823 segment
= xmlURIEscapeStr(BAD_CAST uri
->server
, BAD_CAST
"/?;:@");
1825 if (uri
->user
== NULL
)
1826 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1827 ret
= xmlStrcat(ret
, segment
);
1831 if (uri
->port
> 0) {
1834 snprintf((char *) port
, 11, "%d", uri
->port
);
1835 ret
= xmlStrcat(ret
, BAD_CAST
":");
1836 ret
= xmlStrcat(ret
, port
);
1841 xmlURIEscapeStr(BAD_CAST uri
->path
, BAD_CAST
":@&=+$,/?;");
1843 ret
= xmlStrcat(ret
, segment
);
1847 if (uri
->query_raw
) {
1848 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1849 ret
= xmlStrcat(ret
, BAD_CAST uri
->query_raw
);
1851 else if (uri
->query
) {
1853 xmlURIEscapeStr(BAD_CAST uri
->query
, BAD_CAST
";/?:@&=+,$");
1855 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1856 ret
= xmlStrcat(ret
, segment
);
1861 segment
= xmlURIEscapeStr(BAD_CAST uri
->opaque
, BAD_CAST
"");
1863 ret
= xmlStrcat(ret
, segment
);
1867 if (uri
->fragment
) {
1868 segment
= xmlURIEscapeStr(BAD_CAST uri
->fragment
, BAD_CAST
"#");
1870 ret
= xmlStrcat(ret
, BAD_CAST
"#");
1871 ret
= xmlStrcat(ret
, segment
);
1881 /************************************************************************
1883 * Public functions *
1885 ************************************************************************/
1889 * @URI: the URI instance found in the document
1890 * @base: the base value
1892 * Computes he final URI of the reference done by checking that
1893 * the given URI is valid, and building the final URI using the
1894 * base URI. This is processed according to section 5.2 of the
1897 * 5.2. Resolving Relative References to Absolute Form
1899 * Returns a new URI string (to be freed by the caller) or NULL in case
1903 xmlBuildURI(const xmlChar
*URI
, const xmlChar
*base
) {
1904 xmlChar
*val
= NULL
;
1905 int ret
, len
, indx
, cur
, out
;
1906 xmlURIPtr ref
= NULL
;
1907 xmlURIPtr bas
= NULL
;
1908 xmlURIPtr res
= NULL
;
1911 * 1) The URI reference is parsed into the potential four components and
1912 * fragment identifier, as described in Section 4.3.
1914 * NOTE that a completely empty URI is treated by modern browsers
1915 * as a reference to "." rather than as a synonym for the current
1916 * URI. Should we do that here?
1922 ref
= xmlCreateURI();
1925 ret
= xmlParseURIReference(ref
, (const char *) URI
);
1932 if ((ref
!= NULL
) && (ref
->scheme
!= NULL
)) {
1934 * The URI is absolute don't modify.
1936 val
= xmlStrdup(URI
);
1942 bas
= xmlCreateURI();
1945 ret
= xmlParseURIReference(bas
, (const char *) base
);
1949 val
= xmlSaveUri(ref
);
1954 * the base fragment must be ignored
1956 if (bas
->fragment
!= NULL
) {
1957 xmlFree(bas
->fragment
);
1958 bas
->fragment
= NULL
;
1960 val
= xmlSaveUri(bas
);
1965 * 2) If the path component is empty and the scheme, authority, and
1966 * query components are undefined, then it is a reference to the
1967 * current document and we are done. Otherwise, the reference URI's
1968 * query and fragment components are defined as found (or not found)
1969 * within the URI reference and not inherited from the base URI.
1971 * NOTE that in modern browsers, the parsing differs from the above
1972 * in the following aspect: the query component is allowed to be
1973 * defined while still treating this as a reference to the current
1976 res
= xmlCreateURI();
1979 if ((ref
->scheme
== NULL
) && (ref
->path
== NULL
) &&
1980 ((ref
->authority
== NULL
) && (ref
->server
== NULL
) &&
1981 (ref
->port
== PORT_EMPTY
))) {
1982 if (bas
->scheme
!= NULL
)
1983 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1984 if (bas
->authority
!= NULL
)
1985 res
->authority
= xmlMemStrdup(bas
->authority
);
1987 if (bas
->server
!= NULL
)
1988 res
->server
= xmlMemStrdup(bas
->server
);
1989 if (bas
->user
!= NULL
)
1990 res
->user
= xmlMemStrdup(bas
->user
);
1991 res
->port
= bas
->port
;
1993 if (bas
->path
!= NULL
)
1994 res
->path
= xmlMemStrdup(bas
->path
);
1995 if (ref
->query_raw
!= NULL
)
1996 res
->query_raw
= xmlMemStrdup (ref
->query_raw
);
1997 else if (ref
->query
!= NULL
)
1998 res
->query
= xmlMemStrdup(ref
->query
);
1999 else if (bas
->query_raw
!= NULL
)
2000 res
->query_raw
= xmlMemStrdup(bas
->query_raw
);
2001 else if (bas
->query
!= NULL
)
2002 res
->query
= xmlMemStrdup(bas
->query
);
2003 if (ref
->fragment
!= NULL
)
2004 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2009 * 3) If the scheme component is defined, indicating that the reference
2010 * starts with a scheme name, then the reference is interpreted as an
2011 * absolute URI and we are done. Otherwise, the reference URI's
2012 * scheme is inherited from the base URI's scheme component.
2014 if (ref
->scheme
!= NULL
) {
2015 val
= xmlSaveUri(ref
);
2018 if (bas
->scheme
!= NULL
)
2019 res
->scheme
= xmlMemStrdup(bas
->scheme
);
2021 if (ref
->query_raw
!= NULL
)
2022 res
->query_raw
= xmlMemStrdup(ref
->query_raw
);
2023 else if (ref
->query
!= NULL
)
2024 res
->query
= xmlMemStrdup(ref
->query
);
2025 if (ref
->fragment
!= NULL
)
2026 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2029 * 4) If the authority component is defined, then the reference is a
2030 * network-path and we skip to step 7. Otherwise, the reference
2031 * URI's authority is inherited from the base URI's authority
2032 * component, which will also be undefined if the URI scheme does not
2033 * use an authority component.
2035 if ((ref
->authority
!= NULL
) || (ref
->server
!= NULL
) ||
2036 (ref
->port
!= PORT_EMPTY
)) {
2037 if (ref
->authority
!= NULL
)
2038 res
->authority
= xmlMemStrdup(ref
->authority
);
2040 if (ref
->server
!= NULL
)
2041 res
->server
= xmlMemStrdup(ref
->server
);
2042 if (ref
->user
!= NULL
)
2043 res
->user
= xmlMemStrdup(ref
->user
);
2044 res
->port
= ref
->port
;
2046 if (ref
->path
!= NULL
)
2047 res
->path
= xmlMemStrdup(ref
->path
);
2050 if (bas
->authority
!= NULL
)
2051 res
->authority
= xmlMemStrdup(bas
->authority
);
2052 else if ((bas
->server
!= NULL
) || (bas
->port
!= PORT_EMPTY
)) {
2053 if (bas
->server
!= NULL
)
2054 res
->server
= xmlMemStrdup(bas
->server
);
2055 if (bas
->user
!= NULL
)
2056 res
->user
= xmlMemStrdup(bas
->user
);
2057 res
->port
= bas
->port
;
2061 * 5) If the path component begins with a slash character ("/"), then
2062 * the reference is an absolute-path and we skip to step 7.
2064 if ((ref
->path
!= NULL
) && (ref
->path
[0] == '/')) {
2065 res
->path
= xmlMemStrdup(ref
->path
);
2071 * 6) If this step is reached, then we are resolving a relative-path
2072 * reference. The relative path needs to be merged with the base
2073 * URI's path. Although there are many ways to do this, we will
2074 * describe a simple method using a separate string buffer.
2076 * Allocate a buffer large enough for the result string.
2078 len
= 2; /* extra / and 0 */
2079 if (ref
->path
!= NULL
)
2080 len
+= strlen(ref
->path
);
2081 if (bas
->path
!= NULL
)
2082 len
+= strlen(bas
->path
);
2083 res
->path
= (char *) xmlMallocAtomic(len
);
2084 if (res
->path
== NULL
) {
2085 xmlURIErrMemory("resolving URI against base\n");
2091 * a) All but the last segment of the base URI's path component is
2092 * copied to the buffer. In other words, any characters after the
2093 * last (right-most) slash character, if any, are excluded.
2097 if (bas
->path
!= NULL
) {
2098 while (bas
->path
[cur
] != 0) {
2099 while ((bas
->path
[cur
] != 0) && (bas
->path
[cur
] != '/'))
2101 if (bas
->path
[cur
] == 0)
2106 res
->path
[out
] = bas
->path
[out
];
2114 * b) The reference's path component is appended to the buffer
2117 if (ref
->path
!= NULL
&& ref
->path
[0] != 0) {
2120 * Ensure the path includes a '/'
2122 if ((out
== 0) && ((bas
->server
!= NULL
) || bas
->port
!= PORT_EMPTY
))
2123 res
->path
[out
++] = '/';
2124 while (ref
->path
[indx
] != 0) {
2125 res
->path
[out
++] = ref
->path
[indx
++];
2131 * Steps c) to h) are really path normalization steps
2133 xmlNormalizeURIPath(res
->path
);
2138 * 7) The resulting URI components, including any inherited from the
2139 * base URI, are recombined to give the absolute form of the URI
2142 val
= xmlSaveUri(res
);
2155 * xmlBuildRelativeURI:
2156 * @URI: the URI reference under consideration
2157 * @base: the base value
2159 * Expresses the URI of the reference in terms relative to the
2160 * base. Some examples of this operation include:
2161 * base = "http://site1.com/docs/book1.html"
2162 * URI input URI returned
2163 * docs/pic1.gif pic1.gif
2164 * docs/img/pic1.gif img/pic1.gif
2165 * img/pic1.gif ../img/pic1.gif
2166 * http://site1.com/docs/pic1.gif pic1.gif
2167 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2169 * base = "docs/book1.html"
2170 * URI input URI returned
2171 * docs/pic1.gif pic1.gif
2172 * docs/img/pic1.gif img/pic1.gif
2173 * img/pic1.gif ../img/pic1.gif
2174 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2177 * Note: if the URI reference is really weird or complicated, it may be
2178 * worthwhile to first convert it into a "nice" one by calling
2179 * xmlBuildURI (using 'base') before calling this routine,
2180 * since this routine (for reasonable efficiency) assumes URI has
2181 * already been through some validation.
2183 * Returns a new URI string (to be freed by the caller) or NULL in case
2187 xmlBuildRelativeURI (const xmlChar
* URI
, const xmlChar
* base
)
2189 xmlChar
*val
= NULL
;
2194 xmlURIPtr ref
= NULL
;
2195 xmlURIPtr bas
= NULL
;
2196 xmlChar
*bptr
, *uptr
, *vptr
;
2197 int remove_path
= 0;
2199 if ((URI
== NULL
) || (*URI
== 0))
2203 * First parse URI into a standard form
2205 ref
= xmlCreateURI ();
2208 /* If URI not already in "relative" form */
2209 if (URI
[0] != '.') {
2210 ret
= xmlParseURIReference (ref
, (const char *) URI
);
2212 goto done
; /* Error in URI, return NULL */
2214 ref
->path
= (char *)xmlStrdup(URI
);
2217 * Next parse base into the same standard form
2219 if ((base
== NULL
) || (*base
== 0)) {
2220 val
= xmlStrdup (URI
);
2223 bas
= xmlCreateURI ();
2226 if (base
[0] != '.') {
2227 ret
= xmlParseURIReference (bas
, (const char *) base
);
2229 goto done
; /* Error in base, return NULL */
2231 bas
->path
= (char *)xmlStrdup(base
);
2234 * If the scheme / server on the URI differs from the base,
2235 * just return the URI
2237 if ((ref
->scheme
!= NULL
) &&
2238 ((bas
->scheme
== NULL
) ||
2239 (xmlStrcmp ((xmlChar
*)bas
->scheme
, (xmlChar
*)ref
->scheme
)) ||
2240 (xmlStrcmp ((xmlChar
*)bas
->server
, (xmlChar
*)ref
->server
)) ||
2241 (bas
->port
!= ref
->port
))) {
2242 val
= xmlStrdup (URI
);
2245 if (xmlStrEqual((xmlChar
*)bas
->path
, (xmlChar
*)ref
->path
)) {
2246 val
= xmlStrdup(BAD_CAST
"");
2249 if (bas
->path
== NULL
) {
2250 val
= xmlStrdup((xmlChar
*)ref
->path
);
2253 if (ref
->path
== NULL
) {
2254 ref
->path
= (char *) "/";
2259 * At this point (at last!) we can compare the two paths
2261 * First we take care of the special case where either of the
2262 * two path components may be missing (bug 316224)
2264 bptr
= (xmlChar
*)bas
->path
;
2266 xmlChar
*rptr
= (xmlChar
*) ref
->path
;
2270 * Next we compare the two strings and find where they first differ
2272 if ((*rptr
== '.') && (rptr
[1] == '/'))
2274 if ((*bptr
== '.') && (bptr
[1] == '/'))
2276 else if ((*bptr
== '/') && (*rptr
!= '/'))
2278 while ((bptr
[pos
] == rptr
[pos
]) && (bptr
[pos
] != 0))
2281 if (bptr
[pos
] == rptr
[pos
]) {
2282 val
= xmlStrdup(BAD_CAST
"");
2283 goto done
; /* (I can't imagine why anyone would do this) */
2287 * In URI, "back up" to the last '/' encountered. This will be the
2288 * beginning of the "unique" suffix of URI
2291 for (; ix
> 0; ix
--) {
2292 if (rptr
[ix
- 1] == '/')
2295 uptr
= (xmlChar
*)&rptr
[ix
];
2298 * In base, count the number of '/' from the differing point
2300 for (; bptr
[ix
] != 0; ix
++) {
2301 if (bptr
[ix
] == '/')
2306 * e.g: URI="foo/" base="foo/bar" -> "./"
2308 if (nbslash
== 0 && !uptr
[0]) {
2309 val
= xmlStrdup(BAD_CAST
"./");
2313 len
= xmlStrlen (uptr
) + 1;
2318 /* exception characters from xmlSaveUri */
2319 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2324 * Allocate just enough space for the returned string -
2325 * length of the remainder of the URI, plus enough space
2326 * for the "../" groups, plus one for the terminator
2328 val
= (xmlChar
*) xmlMalloc (len
+ 3 * nbslash
);
2330 xmlURIErrMemory("building relative URI\n");
2335 * Put in as many "../" as needed
2337 for (; nbslash
>0; nbslash
--) {
2343 * Finish up with the end of the URI
2346 if ((vptr
> val
) && (len
> 0) &&
2347 (uptr
[0] == '/') && (vptr
[-1] == '/')) {
2348 memcpy (vptr
, uptr
+ 1, len
- 1);
2351 memcpy (vptr
, uptr
, len
);
2358 /* escape the freshly-built path */
2360 /* exception characters from xmlSaveUri */
2361 val
= xmlURIEscapeStr(vptr
, BAD_CAST
"/;&=+$,");
2366 * Free the working variables
2368 if (remove_path
!= 0)
2380 * @path: the resource locator in a filesystem notation
2382 * Constructs a canonic path from the specified path.
2384 * Returns a new canonic path, or a duplicate of the path parameter if the
2385 * construction fails. The caller is responsible for freeing the memory occupied
2386 * by the returned string. If there is insufficient memory available, or the
2387 * argument is NULL, the function returns NULL.
2389 #define IS_WINDOWS_PATH(p) \
2391 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2392 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2393 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2395 xmlCanonicPath(const xmlChar
*path
)
2398 * For Windows implementations, additional work needs to be done to
2399 * replace backslashes in pathnames with "forward slashes"
2407 const xmlChar
*absuri
;
2414 * We must not change the backslashes to slashes if the the path
2416 * Those paths can be up to 32k characters long.
2417 * Was added specifically for OpenOffice, those paths can't be converted
2420 if ((path
[0] == '\\') && (path
[1] == '\\') && (path
[2] == '?') &&
2422 return xmlStrdup((const xmlChar
*) path
);
2425 /* sanitize filename starting with // so it can be used as URI */
2426 if ((path
[0] == '/') && (path
[1] == '/') && (path
[2] != '/'))
2429 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2431 return xmlStrdup(path
);
2434 /* Check if this is an "absolute uri" */
2435 absuri
= xmlStrstr(path
, BAD_CAST
"://");
2436 if (absuri
!= NULL
) {
2442 * this looks like an URI where some parts have not been
2443 * escaped leading to a parsing problem. Check that the first
2444 * part matches a protocol.
2447 /* Bypass if first part (part before the '://') is > 20 chars */
2448 if ((l
<= 0) || (l
> 20))
2449 goto path_processing
;
2450 /* Bypass if any non-alpha characters are present in first part */
2451 for (j
= 0;j
< l
;j
++) {
2453 if (!(((c
>= 'a') && (c
<= 'z')) || ((c
>= 'A') && (c
<= 'Z'))))
2454 goto path_processing
;
2457 /* Escape all except the characters specified in the supplied path */
2458 escURI
= xmlURIEscapeStr(path
, BAD_CAST
":/?_.#&;=");
2459 if (escURI
!= NULL
) {
2460 /* Try parsing the escaped path */
2461 uri
= xmlParseURI((const char *) escURI
);
2462 /* If successful, return the escaped string */
2472 /* For Windows implementations, replace backslashes with 'forward slashes' */
2475 * Create a URI structure
2477 uri
= xmlCreateURI();
2478 if (uri
== NULL
) { /* Guard against 'out of memory' */
2482 len
= xmlStrlen(path
);
2483 if ((len
> 2) && IS_WINDOWS_PATH(path
)) {
2484 /* make the scheme 'file' */
2485 uri
->scheme
= (char *) xmlStrdup(BAD_CAST
"file");
2486 /* allocate space for leading '/' + path + string terminator */
2487 uri
->path
= xmlMallocAtomic(len
+ 2);
2488 if (uri
->path
== NULL
) {
2489 xmlFreeURI(uri
); /* Guard against 'out of memory' */
2492 /* Put in leading '/' plus path */
2495 strncpy(p
, (char *) path
, len
+ 1);
2497 uri
->path
= (char *) xmlStrdup(path
);
2498 if (uri
->path
== NULL
) {
2504 /* Now change all occurrences of '\' to '/' */
2505 while (*p
!= '\0') {
2511 if (uri
->scheme
== NULL
) {
2512 ret
= xmlStrdup((const xmlChar
*) uri
->path
);
2514 ret
= xmlSaveUri(uri
);
2519 ret
= xmlStrdup((const xmlChar
*) path
);
2526 * @path: the resource locator in a filesystem notation
2528 * Constructs an URI expressing the existing path
2530 * Returns a new URI, or a duplicate of the path parameter if the
2531 * construction fails. The caller is responsible for freeing the memory
2532 * occupied by the returned string. If there is insufficient memory available,
2533 * or the argument is NULL, the function returns NULL.
2536 xmlPathToURI(const xmlChar
*path
)
2545 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2547 return xmlStrdup(path
);
2549 cal
= xmlCanonicPath(path
);
2553 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2554 If 'cal' is a valid URI already then we are done here, as continuing would make
2556 if ((uri
= xmlParseURI((const char *) cal
)) != NULL
) {
2560 /* 'cal' can contain a relative path with backslashes. If that is processed
2561 by xmlSaveURI, they will be escaped and the external entity loader machinery
2562 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2564 while (*ret
!= '\0') {
2570 memset(&temp
, 0, sizeof(temp
));
2571 temp
.path
= (char *) cal
;
2572 ret
= xmlSaveUri(&temp
);