libs/xml2/uri.c

   1 /**
   2  * uri.c: set of generic URI related routines
   3  *
   4  * Reference: RFCs 3986, 2732 and 2373
   5  *
   6  * See Copyright for the status of this software.
   7  *
   8  * daniel@veillard.com
   9  */
  10
  11 #define IN_LIBXML
  12 #include "libxml.h"
  13
  14 #include <limits.h>
  15 #include <string.h>
  16
  17 #include <libxml/xmlmemory.h>
  18 #include <libxml/uri.h>
  19 #include <libxml/globals.h>
  20 #include <libxml/xmlerror.h>
  21
  22 #include "private/error.h"
  23
  24 /**
  25  * MAX_URI_LENGTH:
  26  *
  27  * The definition of the URI regexp in the above RFC has no size limit
  28  * In practice they are usually relatively short except for the
  29  * data URI scheme as defined in RFC 2397. Even for data URI the usual
  30  * maximum size before hitting random practical limits is around 64 KB
  31  * and 4KB is usually a maximum admitted limit for proper operations.
  32  * The value below is more a security limit than anything else and
  33  * really should never be hit by 'normal' operations
  34  * Set to 1 MByte in 2012, this is only enforced on output
  35  */
  36 #define MAX_URI_LENGTH 1024 * 1024
  37
  38 #define PORT_EMPTY           0
  39 #define PORT_EMPTY_SERVER   -1
  40
  41 static void
  42 xmlURIErrMemory(const char *extra)
  43 {
  44     if (extra)
  45         __xmlRaiseError(NULL, NULL, NULL,
  46                         NULL, NULL, XML_FROM_URI,
  47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  48                         extra, NULL, NULL, 0, 0,
  49                         "Memory allocation failed : %s\n", extra);
  50     else
  51         __xmlRaiseError(NULL, NULL, NULL,
  52                         NULL, NULL, XML_FROM_URI,
  53                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  54                         NULL, NULL, NULL, 0, 0,
  55                         "Memory allocation failed\n");
  56 }
  57
  58 static void xmlCleanURI(xmlURIPtr uri);
  59
  60 /*
  61  * Old rule from 2396 used in legacy handling code
  62  * alpha    = lowalpha | upalpha
  63  */
  64 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  65
  66
  67 /*
  68  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  69  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  70  *            "u" | "v" | "w" | "x" | "y" | "z"
  71  */
  72
  73 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  74
  75 /*
  76  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  77  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  78  *           "U" | "V" | "W" | "X" | "Y" | "Z"
  79  */
  80 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  81
  82 #ifdef IS_DIGIT
  83 #undef IS_DIGIT
  84 #endif
  85 /*
  86  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  87  */
  88 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  89
  90 /*
  91  * alphanum = alpha | digit
  92  */
  93
  94 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  95
  96 /*
  97  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  98  */
  99
 100 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
 101     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
 102     ((x) == '(') || ((x) == ')'))
 103
 104 /*
 105  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 106  */
 107
 108 #define IS_UNWISE(p)                                                    \
 109       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 110        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 111        ((*(p) == ']')) || ((*(p) == '`')))
 112 /*
 113  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 114  *            "[" | "]"
 115  */
 116
 117 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
 118         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
 119         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
 120         ((x) == ']'))
 121
 122 /*
 123  * unreserved = alphanum | mark
 124  */
 125
 126 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 127
 128 /*
 129  * Skip to next pointer char, handle escaped sequences
 130  */
 131
 132 #define NEXT(p) ((*p == '%')? p += 3 : p++)
 133
 134 /*
 135  * Productions from the spec.
 136  *
 137  *    authority     = server | reg_name
 138  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 139  *                        ";" | ":" | "@" | "&" | "=" | "+" )
 140  *
 141  * path          = [ abs_path | opaque_part ]
 142  */
 143
 144 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
 145
 146 /************************************************************************
 147  *                                                                      *
 148  *                         RFC 3986 parser                              *
 149  *                                                                      *
 150  ************************************************************************/
 151
 152 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 153 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||               \
 154                       ((*(p) >= 'A') && (*(p) <= 'Z')))
 155 #define ISA_HEXDIG(p)                                                   \
 156        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||             \
 157         ((*(p) >= 'A') && (*(p) <= 'F')))
 158
 159 /*
 160  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 161  *                     / "*" / "+" / "," / ";" / "="
 162  */
 163 #define ISA_SUB_DELIM(p)                                                \
 164       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||         \
 165        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||         \
 166        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||         \
 167        ((*(p) == '=')) || ((*(p) == '\'')))
 168
 169 /*
 170  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 171  */
 172 #define ISA_GEN_DELIM(p)                                                \
 173       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
 174        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
 175        ((*(p) == '@')))
 176
 177 /*
 178  *    reserved      = gen-delims / sub-delims
 179  */
 180 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
 181
 182 /*
 183  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 184  */
 185 #define ISA_UNRESERVED(p)                                               \
 186       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||           \
 187        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 188
 189 /*
 190  *    pct-encoded   = "%" HEXDIG HEXDIG
 191  */
 192 #define ISA_PCT_ENCODED(p)                                              \
 193      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 194
 195 /*
 196  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 197  */
 198 #define ISA_PCHAR(p)                                                    \
 199      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||    \
 200       ((*(p) == ':')) || ((*(p) == '@')))
 201
 202 /**
 203  * xmlParse3986Scheme:
 204  * @uri:  pointer to an URI structure
 205  * @str:  pointer to the string to analyze
 206  *
 207  * Parse an URI scheme
 208  *
 209  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 210  *
 211  * Returns 0 or the error code
 212  */
 213 static int
 214 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
 215     const char *cur;
 216
 217     if (str == NULL)
 218         return(-1);
 219
 220     cur = *str;
 221     if (!ISA_ALPHA(cur))
 222         return(2);
 223     cur++;
 224     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
 225            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
 226     if (uri != NULL) {
 227         if (uri->scheme != NULL) xmlFree(uri->scheme);
 228         uri->scheme = STRNDUP(*str, cur - *str);
 229     }
 230     *str = cur;
 231     return(0);
 232 }
 233
 234 /**
 235  * xmlParse3986Fragment:
 236  * @uri:  pointer to an URI structure
 237  * @str:  pointer to the string to analyze
 238  *
 239  * Parse the query part of an URI
 240  *
 241  * fragment      = *( pchar / "/" / "?" )
 242  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 243  *       in the fragment identifier but this is used very broadly for
 244  *       xpointer scheme selection, so we are allowing it here to not break
 245  *       for example all the DocBook processing chains.
 246  *
 247  * Returns 0 or the error code
 248  */
 249 static int
 250 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
 251 {
 252     const char *cur;
 253
 254     if (str == NULL)
 255         return (-1);
 256
 257     cur = *str;
 258
 259     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 260            (*cur == '[') || (*cur == ']') ||
 261            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 262         NEXT(cur);
 263     if (uri != NULL) {
 264         if (uri->fragment != NULL)
 265             xmlFree(uri->fragment);
 266         if (uri->cleanup & 2)
 267             uri->fragment = STRNDUP(*str, cur - *str);
 268         else
 269             uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
 270     }
 271     *str = cur;
 272     return (0);
 273 }
 274
 275 /**
 276  * xmlParse3986Query:
 277  * @uri:  pointer to an URI structure
 278  * @str:  pointer to the string to analyze
 279  *
 280  * Parse the query part of an URI
 281  *
 282  * query = *uric
 283  *
 284  * Returns 0 or the error code
 285  */
 286 static int
 287 xmlParse3986Query(xmlURIPtr uri, const char **str)
 288 {
 289     const char *cur;
 290
 291     if (str == NULL)
 292         return (-1);
 293
 294     cur = *str;
 295
 296     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 297            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 298         NEXT(cur);
 299     if (uri != NULL) {
 300         if (uri->query != NULL)
 301             xmlFree(uri->query);
 302         if (uri->cleanup & 2)
 303             uri->query = STRNDUP(*str, cur - *str);
 304         else
 305             uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
 306
 307         /* Save the raw bytes of the query as well.
 308          * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
 309          */
 310         if (uri->query_raw != NULL)
 311             xmlFree (uri->query_raw);
 312         uri->query_raw = STRNDUP (*str, cur - *str);
 313     }
 314     *str = cur;
 315     return (0);
 316 }
 317
 318 /**
 319  * xmlParse3986Port:
 320  * @uri:  pointer to an URI structure
 321  * @str:  the string to analyze
 322  *
 323  * Parse a port part and fills in the appropriate fields
 324  * of the @uri structure
 325  *
 326  * port          = *DIGIT
 327  *
 328  * Returns 0 or the error code
 329  */
 330 static int
 331 xmlParse3986Port(xmlURIPtr uri, const char **str)
 332 {
 333     const char *cur = *str;
 334     int port = 0;
 335
 336     if (ISA_DIGIT(cur)) {
 337         while (ISA_DIGIT(cur)) {
 338             int digit = *cur - '0';
 339
 340             if (port > INT_MAX / 10)
 341                 return(1);
 342             port *= 10;
 343             if (port > INT_MAX - digit)
 344                 return(1);
 345             port += digit;
 346
 347             cur++;
 348         }
 349         if (uri != NULL)
 350             uri->port = port;
 351         *str = cur;
 352         return(0);
 353     }
 354     return(1);
 355 }
 356
 357 /**
 358  * xmlParse3986Userinfo:
 359  * @uri:  pointer to an URI structure
 360  * @str:  the string to analyze
 361  *
 362  * Parse an user information part and fills in the appropriate fields
 363  * of the @uri structure
 364  *
 365  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 366  *
 367  * Returns 0 or the error code
 368  */
 369 static int
 370 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
 371 {
 372     const char *cur;
 373
 374     cur = *str;
 375     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
 376            ISA_SUB_DELIM(cur) || (*cur == ':'))
 377         NEXT(cur);
 378     if (*cur == '@') {
 379         if (uri != NULL) {
 380             if (uri->user != NULL) xmlFree(uri->user);
 381             if (uri->cleanup & 2)
 382                 uri->user = STRNDUP(*str, cur - *str);
 383             else
 384                 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
 385         }
 386         *str = cur;
 387         return(0);
 388     }
 389     return(1);
 390 }
 391
 392 /**
 393  * xmlParse3986DecOctet:
 394  * @str:  the string to analyze
 395  *
 396  *    dec-octet     = DIGIT                 ; 0-9
 397  *                  / %x31-39 DIGIT         ; 10-99
 398  *                  / "1" 2DIGIT            ; 100-199
 399  *                  / "2" %x30-34 DIGIT     ; 200-249
 400  *                  / "25" %x30-35          ; 250-255
 401  *
 402  * Skip a dec-octet.
 403  *
 404  * Returns 0 if found and skipped, 1 otherwise
 405  */
 406 static int
 407 xmlParse3986DecOctet(const char **str) {
 408     const char *cur = *str;
 409
 410     if (!(ISA_DIGIT(cur)))
 411         return(1);
 412     if (!ISA_DIGIT(cur+1))
 413         cur++;
 414     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
 415         cur += 2;
 416     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
 417         cur += 3;
 418     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
 419              (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
 420         cur += 3;
 421     else if ((*cur == '2') && (*(cur + 1) == '5') &&
 422              (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
 423         cur += 3;
 424     else
 425         return(1);
 426     *str = cur;
 427     return(0);
 428 }
 429 /**
 430  * xmlParse3986Host:
 431  * @uri:  pointer to an URI structure
 432  * @str:  the string to analyze
 433  *
 434  * Parse an host part and fills in the appropriate fields
 435  * of the @uri structure
 436  *
 437  * host          = IP-literal / IPv4address / reg-name
 438  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 439  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 440  * reg-name      = *( unreserved / pct-encoded / sub-delims )
 441  *
 442  * Returns 0 or the error code
 443  */
 444 static int
 445 xmlParse3986Host(xmlURIPtr uri, const char **str)
 446 {
 447     const char *cur = *str;
 448     const char *host;
 449
 450     host = cur;
 451     /*
 452      * IPv6 and future addressing scheme are enclosed between brackets
 453      */
 454     if (*cur == '[') {
 455         cur++;
 456         while ((*cur != ']') && (*cur != 0))
 457             cur++;
 458         if (*cur != ']')
 459             return(1);
 460         cur++;
 461         goto found;
 462     }
 463     /*
 464      * try to parse an IPv4
 465      */
 466     if (ISA_DIGIT(cur)) {
 467         if (xmlParse3986DecOctet(&cur) != 0)
 468             goto not_ipv4;
 469         if (*cur != '.')
 470             goto not_ipv4;
 471         cur++;
 472         if (xmlParse3986DecOctet(&cur) != 0)
 473             goto not_ipv4;
 474         if (*cur != '.')
 475             goto not_ipv4;
 476         if (xmlParse3986DecOctet(&cur) != 0)
 477             goto not_ipv4;
 478         if (*cur != '.')
 479             goto not_ipv4;
 480         if (xmlParse3986DecOctet(&cur) != 0)
 481             goto not_ipv4;
 482         goto found;
 483 not_ipv4:
 484         cur = *str;
 485     }
 486     /*
 487      * then this should be a hostname which can be empty
 488      */
 489     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
 490         NEXT(cur);
 491 found:
 492     if (uri != NULL) {
 493         if (uri->authority != NULL) xmlFree(uri->authority);
 494         uri->authority = NULL;
 495         if (uri->server != NULL) xmlFree(uri->server);
 496         if (cur != host) {
 497             if (uri->cleanup & 2)
 498                 uri->server = STRNDUP(host, cur - host);
 499             else
 500                 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
 501         } else
 502             uri->server = NULL;
 503     }
 504     *str = cur;
 505     return(0);
 506 }
 507
 508 /**
 509  * xmlParse3986Authority:
 510  * @uri:  pointer to an URI structure
 511  * @str:  the string to analyze
 512  *
 513  * Parse an authority part and fills in the appropriate fields
 514  * of the @uri structure
 515  *
 516  * authority     = [ userinfo "@" ] host [ ":" port ]
 517  *
 518  * Returns 0 or the error code
 519  */
 520 static int
 521 xmlParse3986Authority(xmlURIPtr uri, const char **str)
 522 {
 523     const char *cur;
 524     int ret;
 525
 526     cur = *str;
 527     /*
 528      * try to parse an userinfo and check for the trailing @
 529      */
 530     ret = xmlParse3986Userinfo(uri, &cur);
 531     if ((ret != 0) || (*cur != '@'))
 532         cur = *str;
 533     else
 534         cur++;
 535     ret = xmlParse3986Host(uri, &cur);
 536     if (ret != 0) return(ret);
 537     if (*cur == ':') {
 538         cur++;
 539         ret = xmlParse3986Port(uri, &cur);
 540         if (ret != 0) return(ret);
 541     }
 542     *str = cur;
 543     return(0);
 544 }
 545
 546 /**
 547  * xmlParse3986Segment:
 548  * @str:  the string to analyze
 549  * @forbid: an optional forbidden character
 550  * @empty: allow an empty segment
 551  *
 552  * Parse a segment and fills in the appropriate fields
 553  * of the @uri structure
 554  *
 555  * segment       = *pchar
 556  * segment-nz    = 1*pchar
 557  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 558  *               ; non-zero-length segment without any colon ":"
 559  *
 560  * Returns 0 or the error code
 561  */
 562 static int
 563 xmlParse3986Segment(const char **str, char forbid, int empty)
 564 {
 565     const char *cur;
 566
 567     cur = *str;
 568     if (!ISA_PCHAR(cur)) {
 569         if (empty)
 570             return(0);
 571         return(1);
 572     }
 573     while (ISA_PCHAR(cur) && (*cur != forbid))
 574         NEXT(cur);
 575     *str = cur;
 576     return (0);
 577 }
 578
 579 /**
 580  * xmlParse3986PathAbEmpty:
 581  * @uri:  pointer to an URI structure
 582  * @str:  the string to analyze
 583  *
 584  * Parse an path absolute or empty and fills in the appropriate fields
 585  * of the @uri structure
 586  *
 587  * path-abempty  = *( "/" segment )
 588  *
 589  * Returns 0 or the error code
 590  */
 591 static int
 592 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
 593 {
 594     const char *cur;
 595     int ret;
 596
 597     cur = *str;
 598
 599     while (*cur == '/') {
 600         cur++;
 601         ret = xmlParse3986Segment(&cur, 0, 1);
 602         if (ret != 0) return(ret);
 603     }
 604     if (uri != NULL) {
 605         if (uri->path != NULL) xmlFree(uri->path);
 606         if (*str != cur) {
 607             if (uri->cleanup & 2)
 608                 uri->path = STRNDUP(*str, cur - *str);
 609             else
 610                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 611         } else {
 612             uri->path = NULL;
 613         }
 614     }
 615     *str = cur;
 616     return (0);
 617 }
 618
 619 /**
 620  * xmlParse3986PathAbsolute:
 621  * @uri:  pointer to an URI structure
 622  * @str:  the string to analyze
 623  *
 624  * Parse an path absolute and fills in the appropriate fields
 625  * of the @uri structure
 626  *
 627  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 628  *
 629  * Returns 0 or the error code
 630  */
 631 static int
 632 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
 633 {
 634     const char *cur;
 635     int ret;
 636
 637     cur = *str;
 638
 639     if (*cur != '/')
 640         return(1);
 641     cur++;
 642     ret = xmlParse3986Segment(&cur, 0, 0);
 643     if (ret == 0) {
 644         while (*cur == '/') {
 645             cur++;
 646             ret = xmlParse3986Segment(&cur, 0, 1);
 647             if (ret != 0) return(ret);
 648         }
 649     }
 650     if (uri != NULL) {
 651         if (uri->path != NULL) xmlFree(uri->path);
 652         if (cur != *str) {
 653             if (uri->cleanup & 2)
 654                 uri->path = STRNDUP(*str, cur - *str);
 655             else
 656                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 657         } else {
 658             uri->path = NULL;
 659         }
 660     }
 661     *str = cur;
 662     return (0);
 663 }
 664
 665 /**
 666  * xmlParse3986PathRootless:
 667  * @uri:  pointer to an URI structure
 668  * @str:  the string to analyze
 669  *
 670  * Parse an path without root and fills in the appropriate fields
 671  * of the @uri structure
 672  *
 673  * path-rootless = segment-nz *( "/" segment )
 674  *
 675  * Returns 0 or the error code
 676  */
 677 static int
 678 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
 679 {
 680     const char *cur;
 681     int ret;
 682
 683     cur = *str;
 684
 685     ret = xmlParse3986Segment(&cur, 0, 0);
 686     if (ret != 0) return(ret);
 687     while (*cur == '/') {
 688         cur++;
 689         ret = xmlParse3986Segment(&cur, 0, 1);
 690         if (ret != 0) return(ret);
 691     }
 692     if (uri != NULL) {
 693         if (uri->path != NULL) xmlFree(uri->path);
 694         if (cur != *str) {
 695             if (uri->cleanup & 2)
 696                 uri->path = STRNDUP(*str, cur - *str);
 697             else
 698                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 699         } else {
 700             uri->path = NULL;
 701         }
 702     }
 703     *str = cur;
 704     return (0);
 705 }
 706
 707 /**
 708  * xmlParse3986PathNoScheme:
 709  * @uri:  pointer to an URI structure
 710  * @str:  the string to analyze
 711  *
 712  * Parse an path which is not a scheme and fills in the appropriate fields
 713  * of the @uri structure
 714  *
 715  * path-noscheme = segment-nz-nc *( "/" segment )
 716  *
 717  * Returns 0 or the error code
 718  */
 719 static int
 720 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
 721 {
 722     const char *cur;
 723     int ret;
 724
 725     cur = *str;
 726
 727     ret = xmlParse3986Segment(&cur, ':', 0);
 728     if (ret != 0) return(ret);
 729     while (*cur == '/') {
 730         cur++;
 731         ret = xmlParse3986Segment(&cur, 0, 1);
 732         if (ret != 0) return(ret);
 733     }
 734     if (uri != NULL) {
 735         if (uri->path != NULL) xmlFree(uri->path);
 736         if (cur != *str) {
 737             if (uri->cleanup & 2)
 738                 uri->path = STRNDUP(*str, cur - *str);
 739             else
 740                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 741         } else {
 742             uri->path = NULL;
 743         }
 744     }
 745     *str = cur;
 746     return (0);
 747 }
 748
 749 /**
 750  * xmlParse3986HierPart:
 751  * @uri:  pointer to an URI structure
 752  * @str:  the string to analyze
 753  *
 754  * Parse an hierarchical part and fills in the appropriate fields
 755  * of the @uri structure
 756  *
 757  * hier-part     = "//" authority path-abempty
 758  *                / path-absolute
 759  *                / path-rootless
 760  *                / path-empty
 761  *
 762  * Returns 0 or the error code
 763  */
 764 static int
 765 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
 766 {
 767     const char *cur;
 768     int ret;
 769
 770     cur = *str;
 771
 772     if ((*cur == '/') && (*(cur + 1) == '/')) {
 773         cur += 2;
 774         ret = xmlParse3986Authority(uri, &cur);
 775         if (ret != 0) return(ret);
 776         /*
 777          * An empty server is marked with a special URI value.
 778          */
 779         if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
 780             uri->port = PORT_EMPTY_SERVER;
 781         ret = xmlParse3986PathAbEmpty(uri, &cur);
 782         if (ret != 0) return(ret);
 783         *str = cur;
 784         return(0);
 785     } else if (*cur == '/') {
 786         ret = xmlParse3986PathAbsolute(uri, &cur);
 787         if (ret != 0) return(ret);
 788     } else if (ISA_PCHAR(cur)) {
 789         ret = xmlParse3986PathRootless(uri, &cur);
 790         if (ret != 0) return(ret);
 791     } else {
 792         /* path-empty is effectively empty */
 793         if (uri != NULL) {
 794             if (uri->path != NULL) xmlFree(uri->path);
 795             uri->path = NULL;
 796         }
 797     }
 798     *str = cur;
 799     return (0);
 800 }
 801
 802 /**
 803  * xmlParse3986RelativeRef:
 804  * @uri:  pointer to an URI structure
 805  * @str:  the string to analyze
 806  *
 807  * Parse an URI string and fills in the appropriate fields
 808  * of the @uri structure
 809  *
 810  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 811  * relative-part = "//" authority path-abempty
 812  *               / path-absolute
 813  *               / path-noscheme
 814  *               / path-empty
 815  *
 816  * Returns 0 or the error code
 817  */
 818 static int
 819 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
 820     int ret;
 821
 822     if ((*str == '/') && (*(str + 1) == '/')) {
 823         str += 2;
 824         ret = xmlParse3986Authority(uri, &str);
 825         if (ret != 0) return(ret);
 826         ret = xmlParse3986PathAbEmpty(uri, &str);
 827         if (ret != 0) return(ret);
 828     } else if (*str == '/') {
 829         ret = xmlParse3986PathAbsolute(uri, &str);
 830         if (ret != 0) return(ret);
 831     } else if (ISA_PCHAR(str)) {
 832         ret = xmlParse3986PathNoScheme(uri, &str);
 833         if (ret != 0) return(ret);
 834     } else {
 835         /* path-empty is effectively empty */
 836         if (uri != NULL) {
 837             if (uri->path != NULL) xmlFree(uri->path);
 838             uri->path = NULL;
 839         }
 840     }
 841
 842     if (*str == '?') {
 843         str++;
 844         ret = xmlParse3986Query(uri, &str);
 845         if (ret != 0) return(ret);
 846     }
 847     if (*str == '#') {
 848         str++;
 849         ret = xmlParse3986Fragment(uri, &str);
 850         if (ret != 0) return(ret);
 851     }
 852     if (*str != 0) {
 853         xmlCleanURI(uri);
 854         return(1);
 855     }
 856     return(0);
 857 }
 858
 859
 860 /**
 861  * xmlParse3986URI:
 862  * @uri:  pointer to an URI structure
 863  * @str:  the string to analyze
 864  *
 865  * Parse an URI string and fills in the appropriate fields
 866  * of the @uri structure
 867  *
 868  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 869  *
 870  * Returns 0 or the error code
 871  */
 872 static int
 873 xmlParse3986URI(xmlURIPtr uri, const char *str) {
 874     int ret;
 875
 876     ret = xmlParse3986Scheme(uri, &str);
 877     if (ret != 0) return(ret);
 878     if (*str != ':') {
 879         return(1);
 880     }
 881     str++;
 882     ret = xmlParse3986HierPart(uri, &str);
 883     if (ret != 0) return(ret);
 884     if (*str == '?') {
 885         str++;
 886         ret = xmlParse3986Query(uri, &str);
 887         if (ret != 0) return(ret);
 888     }
 889     if (*str == '#') {
 890         str++;
 891         ret = xmlParse3986Fragment(uri, &str);
 892         if (ret != 0) return(ret);
 893     }
 894     if (*str != 0) {
 895         xmlCleanURI(uri);
 896         return(1);
 897     }
 898     return(0);
 899 }
 900
 901 /**
 902  * xmlParse3986URIReference:
 903  * @uri:  pointer to an URI structure
 904  * @str:  the string to analyze
 905  *
 906  * Parse an URI reference string and fills in the appropriate fields
 907  * of the @uri structure
 908  *
 909  * URI-reference = URI / relative-ref
 910  *
 911  * Returns 0 or the error code
 912  */
 913 static int
 914 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
 915     int ret;
 916
 917     if (str == NULL)
 918         return(-1);
 919     xmlCleanURI(uri);
 920
 921     /*
 922      * Try first to parse absolute refs, then fallback to relative if
 923      * it fails.
 924      */
 925     ret = xmlParse3986URI(uri, str);
 926     if (ret != 0) {
 927         xmlCleanURI(uri);
 928         ret = xmlParse3986RelativeRef(uri, str);
 929         if (ret != 0) {
 930             xmlCleanURI(uri);
 931             return(ret);
 932         }
 933     }
 934     return(0);
 935 }
 936
 937 /**
 938  * xmlParseURI:
 939  * @str:  the URI string to analyze
 940  *
 941  * Parse an URI based on RFC 3986
 942  *
 943  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 944  *
 945  * Returns a newly built xmlURIPtr or NULL in case of error
 946  */
 947 xmlURIPtr
 948 xmlParseURI(const char *str) {
 949     xmlURIPtr uri;
 950     int ret;
 951
 952     if (str == NULL)
 953         return(NULL);
 954     uri = xmlCreateURI();
 955     if (uri != NULL) {
 956         ret = xmlParse3986URIReference(uri, str);
 957         if (ret) {
 958             xmlFreeURI(uri);
 959             return(NULL);
 960         }
 961     }
 962     return(uri);
 963 }
 964
 965 /**
 966  * xmlParseURIReference:
 967  * @uri:  pointer to an URI structure
 968  * @str:  the string to analyze
 969  *
 970  * Parse an URI reference string based on RFC 3986 and fills in the
 971  * appropriate fields of the @uri structure
 972  *
 973  * URI-reference = URI / relative-ref
 974  *
 975  * Returns 0 or the error code
 976  */
 977 int
 978 xmlParseURIReference(xmlURIPtr uri, const char *str) {
 979     return(xmlParse3986URIReference(uri, str));
 980 }
 981
 982 /**
 983  * xmlParseURIRaw:
 984  * @str:  the URI string to analyze
 985  * @raw:  if 1 unescaping of URI pieces are disabled
 986  *
 987  * Parse an URI but allows to keep intact the original fragments.
 988  *
 989  * URI-reference = URI / relative-ref
 990  *
 991  * Returns a newly built xmlURIPtr or NULL in case of error
 992  */
 993 xmlURIPtr
 994 xmlParseURIRaw(const char *str, int raw) {
 995     xmlURIPtr uri;
 996     int ret;
 997
 998     if (str == NULL)
 999         return(NULL);
1000     uri = xmlCreateURI();
1001     if (uri != NULL) {
1002         if (raw) {
1003             uri->cleanup |= 2;
1004         }
1005         ret = xmlParseURIReference(uri, str);
1006         if (ret) {
1007             xmlFreeURI(uri);
1008             return(NULL);
1009         }
1010     }
1011     return(uri);
1012 }
1013
1014 /************************************************************************
1015  *                                                                      *
1016  *                      Generic URI structure functions                 *
1017  *                                                                      *
1018  ************************************************************************/
1019
1020 /**
1021  * xmlCreateURI:
1022  *
1023  * Simply creates an empty xmlURI
1024  *
1025  * Returns the new structure or NULL in case of error
1026  */
1027 xmlURIPtr
1028 xmlCreateURI(void) {
1029     xmlURIPtr ret;
1030
1031     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1032     if (ret == NULL) {
1033         xmlURIErrMemory("creating URI structure\n");
1034         return(NULL);
1035     }
1036     memset(ret, 0, sizeof(xmlURI));
1037     ret->port = PORT_EMPTY;
1038     return(ret);
1039 }
1040
1041 /**
1042  * xmlSaveUriRealloc:
1043  *
1044  * Function to handle properly a reallocation when saving an URI
1045  * Also imposes some limit on the length of an URI string output
1046  */
1047 static xmlChar *
1048 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1049     xmlChar *temp;
1050     int tmp;
1051
1052     if (*max > MAX_URI_LENGTH) {
1053         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1054         return(NULL);
1055     }
1056     tmp = *max * 2;
1057     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1058     if (temp == NULL) {
1059         xmlURIErrMemory("saving URI\n");
1060         return(NULL);
1061     }
1062     *max = tmp;
1063     return(temp);
1064 }
1065
1066 /**
1067  * xmlSaveUri:
1068  * @uri:  pointer to an xmlURI
1069  *
1070  * Save the URI as an escaped string
1071  *
1072  * Returns a new string (to be deallocated by caller)
1073  */
1074 xmlChar *
1075 xmlSaveUri(xmlURIPtr uri) {
1076     xmlChar *ret = NULL;
1077     xmlChar *temp;
1078     const char *p;
1079     int len;
1080     int max;
1081
1082     if (uri == NULL) return(NULL);
1083
1084
1085     max = 80;
1086     ret = (xmlChar *) xmlMallocAtomic(max + 1);
1087     if (ret == NULL) {
1088         xmlURIErrMemory("saving URI\n");
1089         return(NULL);
1090     }
1091     len = 0;
1092
1093     if (uri->scheme != NULL) {
1094         p = uri->scheme;
1095         while (*p != 0) {
1096             if (len >= max) {
1097                 temp = xmlSaveUriRealloc(ret, &max);
1098                 if (temp == NULL) goto mem_error;
1099                 ret = temp;
1100             }
1101             ret[len++] = *p++;
1102         }
1103         if (len >= max) {
1104             temp = xmlSaveUriRealloc(ret, &max);
1105             if (temp == NULL) goto mem_error;
1106             ret = temp;
1107         }
1108         ret[len++] = ':';
1109     }
1110     if (uri->opaque != NULL) {
1111         p = uri->opaque;
1112         while (*p != 0) {
1113             if (len + 3 >= max) {
1114                 temp = xmlSaveUriRealloc(ret, &max);
1115                 if (temp == NULL) goto mem_error;
1116                 ret = temp;
1117             }
1118             if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1119                 ret[len++] = *p++;
1120             else {
1121                 int val = *(unsigned char *)p++;
1122                 int hi = val / 0x10, lo = val % 0x10;
1123                 ret[len++] = '%';
1124                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1125                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1126             }
1127         }
1128     } else {
1129         if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1130             if (len + 3 >= max) {
1131                 temp = xmlSaveUriRealloc(ret, &max);
1132                 if (temp == NULL) goto mem_error;
1133                 ret = temp;
1134             }
1135             ret[len++] = '/';
1136             ret[len++] = '/';
1137             if (uri->user != NULL) {
1138                 p = uri->user;
1139                 while (*p != 0) {
1140                     if (len + 3 >= max) {
1141                         temp = xmlSaveUriRealloc(ret, &max);
1142                         if (temp == NULL) goto mem_error;
1143                         ret = temp;
1144                     }
1145                     if ((IS_UNRESERVED(*(p))) ||
1146                         ((*(p) == ';')) || ((*(p) == ':')) ||
1147                         ((*(p) == '&')) || ((*(p) == '=')) ||
1148                         ((*(p) == '+')) || ((*(p) == '$')) ||
1149                         ((*(p) == ',')))
1150                         ret[len++] = *p++;
1151                     else {
1152                         int val = *(unsigned char *)p++;
1153                         int hi = val / 0x10, lo = val % 0x10;
1154                         ret[len++] = '%';
1155                         ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1156                         ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1157                     }
1158                 }
1159                 if (len + 3 >= max) {
1160                     temp = xmlSaveUriRealloc(ret, &max);
1161                     if (temp == NULL) goto mem_error;
1162                     ret = temp;
1163                 }
1164                 ret[len++] = '@';
1165             }
1166             if (uri->server != NULL) {
1167                 p = uri->server;
1168                 while (*p != 0) {
1169                     if (len >= max) {
1170                         temp = xmlSaveUriRealloc(ret, &max);
1171                         if (temp == NULL) goto mem_error;
1172                         ret = temp;
1173                     }
1174                     /* TODO: escaping? */
1175                     ret[len++] = (xmlChar) *p++;
1176                 }
1177             }
1178             if (uri->port > 0) {
1179                 if (len + 10 >= max) {
1180                     temp = xmlSaveUriRealloc(ret, &max);
1181                     if (temp == NULL) goto mem_error;
1182                     ret = temp;
1183                 }
1184                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1185             }
1186         } else if (uri->authority != NULL) {
1187             if (len + 3 >= max) {
1188                 temp = xmlSaveUriRealloc(ret, &max);
1189                 if (temp == NULL) goto mem_error;
1190                 ret = temp;
1191             }
1192             ret[len++] = '/';
1193             ret[len++] = '/';
1194             p = uri->authority;
1195             while (*p != 0) {
1196                 if (len + 3 >= max) {
1197                     temp = xmlSaveUriRealloc(ret, &max);
1198                     if (temp == NULL) goto mem_error;
1199                     ret = temp;
1200                 }
1201                 if ((IS_UNRESERVED(*(p))) ||
1202                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1203                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1204                     ((*(p) == '=')) || ((*(p) == '+')))
1205                     ret[len++] = *p++;
1206                 else {
1207                     int val = *(unsigned char *)p++;
1208                     int hi = val / 0x10, lo = val % 0x10;
1209                     ret[len++] = '%';
1210                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1211                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1212                 }
1213             }
1214         } else if (uri->scheme != NULL) {
1215             if (len + 3 >= max) {
1216                 temp = xmlSaveUriRealloc(ret, &max);
1217                 if (temp == NULL) goto mem_error;
1218                 ret = temp;
1219             }
1220         }
1221         if (uri->path != NULL) {
1222             p = uri->path;
1223             /*
1224              * the colon in file:///d: should not be escaped or
1225              * Windows accesses fail later.
1226              */
1227             if ((uri->scheme != NULL) &&
1228                 (p[0] == '/') &&
1229                 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1230                  ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1231                 (p[2] == ':') &&
1232                 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1233                 if (len + 3 >= max) {
1234                     temp = xmlSaveUriRealloc(ret, &max);
1235                     if (temp == NULL) goto mem_error;
1236                     ret = temp;
1237                 }
1238                 ret[len++] = *p++;
1239                 ret[len++] = *p++;
1240                 ret[len++] = *p++;
1241             }
1242             while (*p != 0) {
1243                 if (len + 3 >= max) {
1244                     temp = xmlSaveUriRealloc(ret, &max);
1245                     if (temp == NULL) goto mem_error;
1246                     ret = temp;
1247                 }
1248                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1249                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1250                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1251                     ((*(p) == ',')))
1252                     ret[len++] = *p++;
1253                 else {
1254                     int val = *(unsigned char *)p++;
1255                     int hi = val / 0x10, lo = val % 0x10;
1256                     ret[len++] = '%';
1257                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1258                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1259                 }
1260             }
1261         }
1262         if (uri->query_raw != NULL) {
1263             if (len + 1 >= max) {
1264                 temp = xmlSaveUriRealloc(ret, &max);
1265                 if (temp == NULL) goto mem_error;
1266                 ret = temp;
1267             }
1268             ret[len++] = '?';
1269             p = uri->query_raw;
1270             while (*p != 0) {
1271                 if (len + 1 >= max) {
1272                     temp = xmlSaveUriRealloc(ret, &max);
1273                     if (temp == NULL) goto mem_error;
1274                     ret = temp;
1275                 }
1276                 ret[len++] = *p++;
1277             }
1278         } else if (uri->query != NULL) {
1279             if (len + 3 >= max) {
1280                 temp = xmlSaveUriRealloc(ret, &max);
1281                 if (temp == NULL) goto mem_error;
1282                 ret = temp;
1283             }
1284             ret[len++] = '?';
1285             p = uri->query;
1286             while (*p != 0) {
1287                 if (len + 3 >= max) {
1288                     temp = xmlSaveUriRealloc(ret, &max);
1289                     if (temp == NULL) goto mem_error;
1290                     ret = temp;
1291                 }
1292                 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1293                     ret[len++] = *p++;
1294                 else {
1295                     int val = *(unsigned char *)p++;
1296                     int hi = val / 0x10, lo = val % 0x10;
1297                     ret[len++] = '%';
1298                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1299                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1300                 }
1301             }
1302         }
1303     }
1304     if (uri->fragment != NULL) {
1305         if (len + 3 >= max) {
1306             temp = xmlSaveUriRealloc(ret, &max);
1307             if (temp == NULL) goto mem_error;
1308             ret = temp;
1309         }
1310         ret[len++] = '#';
1311         p = uri->fragment;
1312         while (*p != 0) {
1313             if (len + 3 >= max) {
1314                 temp = xmlSaveUriRealloc(ret, &max);
1315                 if (temp == NULL) goto mem_error;
1316                 ret = temp;
1317             }
1318             if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1319                 ret[len++] = *p++;
1320             else {
1321                 int val = *(unsigned char *)p++;
1322                 int hi = val / 0x10, lo = val % 0x10;
1323                 ret[len++] = '%';
1324                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1325                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1326             }
1327         }
1328     }
1329     if (len >= max) {
1330         temp = xmlSaveUriRealloc(ret, &max);
1331         if (temp == NULL) goto mem_error;
1332         ret = temp;
1333     }
1334     ret[len] = 0;
1335     return(ret);
1336
1337 mem_error:
1338     xmlFree(ret);
1339     return(NULL);
1340 }
1341
1342 /**
1343  * xmlPrintURI:
1344  * @stream:  a FILE* for the output
1345  * @uri:  pointer to an xmlURI
1346  *
1347  * Prints the URI in the stream @stream.
1348  */
1349 void
1350 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1351     xmlChar *out;
1352
1353     out = xmlSaveUri(uri);
1354     if (out != NULL) {
1355         fprintf(stream, "%s", (char *) out);
1356         xmlFree(out);
1357     }
1358 }
1359
1360 /**
1361  * xmlCleanURI:
1362  * @uri:  pointer to an xmlURI
1363  *
1364  * Make sure the xmlURI struct is free of content
1365  */
1366 static void
1367 xmlCleanURI(xmlURIPtr uri) {
1368     if (uri == NULL) return;
1369
1370     if (uri->scheme != NULL) xmlFree(uri->scheme);
1371     uri->scheme = NULL;
1372     if (uri->server != NULL) xmlFree(uri->server);
1373     uri->server = NULL;
1374     if (uri->user != NULL) xmlFree(uri->user);
1375     uri->user = NULL;
1376     if (uri->path != NULL) xmlFree(uri->path);
1377     uri->path = NULL;
1378     if (uri->fragment != NULL) xmlFree(uri->fragment);
1379     uri->fragment = NULL;
1380     if (uri->opaque != NULL) xmlFree(uri->opaque);
1381     uri->opaque = NULL;
1382     if (uri->authority != NULL) xmlFree(uri->authority);
1383     uri->authority = NULL;
1384     if (uri->query != NULL) xmlFree(uri->query);
1385     uri->query = NULL;
1386     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1387     uri->query_raw = NULL;
1388 }
1389
1390 /**
1391  * xmlFreeURI:
1392  * @uri:  pointer to an xmlURI
1393  *
1394  * Free up the xmlURI struct
1395  */
1396 void
1397 xmlFreeURI(xmlURIPtr uri) {
1398     if (uri == NULL) return;
1399
1400     if (uri->scheme != NULL) xmlFree(uri->scheme);
1401     if (uri->server != NULL) xmlFree(uri->server);
1402     if (uri->user != NULL) xmlFree(uri->user);
1403     if (uri->path != NULL) xmlFree(uri->path);
1404     if (uri->fragment != NULL) xmlFree(uri->fragment);
1405     if (uri->opaque != NULL) xmlFree(uri->opaque);
1406     if (uri->authority != NULL) xmlFree(uri->authority);
1407     if (uri->query != NULL) xmlFree(uri->query);
1408     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1409     xmlFree(uri);
1410 }
1411
1412 /************************************************************************
1413  *                                                                      *
1414  *                      Helper functions                                *
1415  *                                                                      *
1416  ************************************************************************/
1417
1418 /**
1419  * xmlNormalizeURIPath:
1420  * @path:  pointer to the path string
1421  *
1422  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1423  * Section 5.2, steps 6.c through 6.g.
1424  *
1425  * Normalization occurs directly on the string, no new allocation is done
1426  *
1427  * Returns 0 or an error code
1428  */
1429 int
1430 xmlNormalizeURIPath(char *path) {
1431     char *cur, *out;
1432
1433     if (path == NULL)
1434         return(-1);
1435
1436     /* Skip all initial "/" chars.  We want to get to the beginning of the
1437      * first non-empty segment.
1438      */
1439     cur = path;
1440     while (cur[0] == '/')
1441       ++cur;
1442     if (cur[0] == '\0')
1443       return(0);
1444
1445     /* Keep everything we've seen so far.  */
1446     out = cur;
1447
1448     /*
1449      * Analyze each segment in sequence for cases (c) and (d).
1450      */
1451     while (cur[0] != '\0') {
1452         /*
1453          * c) All occurrences of "./", where "." is a complete path segment,
1454          *    are removed from the buffer string.
1455          */
1456         if ((cur[0] == '.') && (cur[1] == '/')) {
1457             cur += 2;
1458             /* '//' normalization should be done at this point too */
1459             while (cur[0] == '/')
1460                 cur++;
1461             continue;
1462         }
1463
1464         /*
1465          * d) If the buffer string ends with "." as a complete path segment,
1466          *    that "." is removed.
1467          */
1468         if ((cur[0] == '.') && (cur[1] == '\0'))
1469             break;
1470
1471         /* Otherwise keep the segment.  */
1472         while (cur[0] != '/') {
1473             if (cur[0] == '\0')
1474               goto done_cd;
1475             (out++)[0] = (cur++)[0];
1476         }
1477         /* normalize // */
1478         while ((cur[0] == '/') && (cur[1] == '/'))
1479             cur++;
1480
1481         (out++)[0] = (cur++)[0];
1482     }
1483  done_cd:
1484     out[0] = '\0';
1485
1486     /* Reset to the beginning of the first segment for the next sequence.  */
1487     cur = path;
1488     while (cur[0] == '/')
1489       ++cur;
1490     if (cur[0] == '\0')
1491         return(0);
1492
1493     /*
1494      * Analyze each segment in sequence for cases (e) and (f).
1495      *
1496      * e) All occurrences of "<segment>/../", where <segment> is a
1497      *    complete path segment not equal to "..", are removed from the
1498      *    buffer string.  Removal of these path segments is performed
1499      *    iteratively, removing the leftmost matching pattern on each
1500      *    iteration, until no matching pattern remains.
1501      *
1502      * f) If the buffer string ends with "<segment>/..", where <segment>
1503      *    is a complete path segment not equal to "..", that
1504      *    "<segment>/.." is removed.
1505      *
1506      * To satisfy the "iterative" clause in (e), we need to collapse the
1507      * string every time we find something that needs to be removed.  Thus,
1508      * we don't need to keep two pointers into the string: we only need a
1509      * "current position" pointer.
1510      */
1511     while (1) {
1512         char *segp, *tmp;
1513
1514         /* At the beginning of each iteration of this loop, "cur" points to
1515          * the first character of the segment we want to examine.
1516          */
1517
1518         /* Find the end of the current segment.  */
1519         segp = cur;
1520         while ((segp[0] != '/') && (segp[0] != '\0'))
1521           ++segp;
1522
1523         /* If this is the last segment, we're done (we need at least two
1524          * segments to meet the criteria for the (e) and (f) cases).
1525          */
1526         if (segp[0] == '\0')
1527           break;
1528
1529         /* If the first segment is "..", or if the next segment _isn't_ "..",
1530          * keep this segment and try the next one.
1531          */
1532         ++segp;
1533         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1534             || ((segp[0] != '.') || (segp[1] != '.')
1535                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1536           cur = segp;
1537           continue;
1538         }
1539
1540         /* If we get here, remove this segment and the next one and back up
1541          * to the previous segment (if there is one), to implement the
1542          * "iteratively" clause.  It's pretty much impossible to back up
1543          * while maintaining two pointers into the buffer, so just compact
1544          * the whole buffer now.
1545          */
1546
1547         /* If this is the end of the buffer, we're done.  */
1548         if (segp[2] == '\0') {
1549           cur[0] = '\0';
1550           break;
1551         }
1552         /* Valgrind complained, strcpy(cur, segp + 3); */
1553         /* string will overlap, do not use strcpy */
1554         tmp = cur;
1555         segp += 3;
1556         while ((*tmp++ = *segp++) != 0)
1557           ;
1558
1559         /* If there are no previous segments, then keep going from here.  */
1560         segp = cur;
1561         while ((segp > path) && ((--segp)[0] == '/'))
1562           ;
1563         if (segp == path)
1564           continue;
1565
1566         /* "segp" is pointing to the end of a previous segment; find it's
1567          * start.  We need to back up to the previous segment and start
1568          * over with that to handle things like "foo/bar/../..".  If we
1569          * don't do this, then on the first pass we'll remove the "bar/..",
1570          * but be pointing at the second ".." so we won't realize we can also
1571          * remove the "foo/..".
1572          */
1573         cur = segp;
1574         while ((cur > path) && (cur[-1] != '/'))
1575           --cur;
1576     }
1577     out[0] = '\0';
1578
1579     /*
1580      * g) If the resulting buffer string still begins with one or more
1581      *    complete path segments of "..", then the reference is
1582      *    considered to be in error. Implementations may handle this
1583      *    error by retaining these components in the resolved path (i.e.,
1584      *    treating them as part of the final URI), by removing them from
1585      *    the resolved path (i.e., discarding relative levels above the
1586      *    root), or by avoiding traversal of the reference.
1587      *
1588      * We discard them from the final path.
1589      */
1590     if (path[0] == '/') {
1591       cur = path;
1592       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1593              && ((cur[3] == '/') || (cur[3] == '\0')))
1594         cur += 3;
1595
1596       if (cur != path) {
1597         out = path;
1598         while (cur[0] != '\0')
1599           (out++)[0] = (cur++)[0];
1600         out[0] = 0;
1601       }
1602     }
1603
1604     return(0);
1605 }
1606
1607 static int is_hex(char c) {
1608     if (((c >= '0') && (c <= '9')) ||
1609         ((c >= 'a') && (c <= 'f')) ||
1610         ((c >= 'A') && (c <= 'F')))
1611         return(1);
1612     return(0);
1613 }
1614
1615 /**
1616  * xmlURIUnescapeString:
1617  * @str:  the string to unescape
1618  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1619  * @target:  optional destination buffer
1620  *
1621  * Unescaping routine, but does not check that the string is an URI. The
1622  * output is a direct unsigned char translation of %XX values (no encoding)
1623  * Note that the length of the result can only be smaller or same size as
1624  * the input string.
1625  *
1626  * Returns a copy of the string, but unescaped, will return NULL only in case
1627  * of error
1628  */
1629 char *
1630 xmlURIUnescapeString(const char *str, int len, char *target) {
1631     char *ret, *out;
1632     const char *in;
1633
1634     if (str == NULL)
1635         return(NULL);
1636     if (len <= 0) len = strlen(str);
1637     if (len < 0) return(NULL);
1638
1639     if (target == NULL) {
1640         ret = (char *) xmlMallocAtomic(len + 1);
1641         if (ret == NULL) {
1642             xmlURIErrMemory("unescaping URI value\n");
1643             return(NULL);
1644         }
1645     } else
1646         ret = target;
1647     in = str;
1648     out = ret;
1649     while(len > 0) {
1650         if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1651             int c = 0;
1652             in++;
1653             if ((*in >= '0') && (*in <= '9'))
1654                 c = (*in - '0');
1655             else if ((*in >= 'a') && (*in <= 'f'))
1656                 c = (*in - 'a') + 10;
1657             else if ((*in >= 'A') && (*in <= 'F'))
1658                 c = (*in - 'A') + 10;
1659             in++;
1660             if ((*in >= '0') && (*in <= '9'))
1661                 c = c * 16 + (*in - '0');
1662             else if ((*in >= 'a') && (*in <= 'f'))
1663                 c = c * 16 + (*in - 'a') + 10;
1664             else if ((*in >= 'A') && (*in <= 'F'))
1665                 c = c * 16 + (*in - 'A') + 10;
1666             in++;
1667             len -= 3;
1668             /* Explicit sign change */
1669             *out++ = (char) c;
1670         } else {
1671             *out++ = *in++;
1672             len--;
1673         }
1674     }
1675     *out = 0;
1676     return(ret);
1677 }
1678
1679 /**
1680  * xmlURIEscapeStr:
1681  * @str:  string to escape
1682  * @list: exception list string of chars not to escape
1683  *
1684  * This routine escapes a string to hex, ignoring reserved characters
1685  * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1686  *
1687  * Returns a new escaped string or NULL in case of error.
1688  */
1689 xmlChar *
1690 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1691     xmlChar *ret, ch;
1692     xmlChar *temp;
1693     const xmlChar *in;
1694     int len, out;
1695
1696     if (str == NULL)
1697         return(NULL);
1698     if (str[0] == 0)
1699         return(xmlStrdup(str));
1700     len = xmlStrlen(str);
1701     if (!(len > 0)) return(NULL);
1702
1703     len += 20;
1704     ret = (xmlChar *) xmlMallocAtomic(len);
1705     if (ret == NULL) {
1706         xmlURIErrMemory("escaping URI value\n");
1707         return(NULL);
1708     }
1709     in = (const xmlChar *) str;
1710     out = 0;
1711     while(*in != 0) {
1712         if (len - out <= 3) {
1713             temp = xmlSaveUriRealloc(ret, &len);
1714             if (temp == NULL) {
1715                 xmlURIErrMemory("escaping URI value\n");
1716                 xmlFree(ret);
1717                 return(NULL);
1718             }
1719             ret = temp;
1720         }
1721
1722         ch = *in;
1723
1724         if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1725             unsigned char val;
1726             ret[out++] = '%';
1727             val = ch >> 4;
1728             if (val <= 9)
1729                 ret[out++] = '0' + val;
1730             else
1731                 ret[out++] = 'A' + val - 0xA;
1732             val = ch & 0xF;
1733             if (val <= 9)
1734                 ret[out++] = '0' + val;
1735             else
1736                 ret[out++] = 'A' + val - 0xA;
1737             in++;
1738         } else {
1739             ret[out++] = *in++;
1740         }
1741
1742     }
1743     ret[out] = 0;
1744     return(ret);
1745 }
1746
1747 /**
1748  * xmlURIEscape:
1749  * @str:  the string of the URI to escape
1750  *
1751  * Escaping routine, does not do validity checks !
1752  * It will try to escape the chars needing this, but this is heuristic
1753  * based it's impossible to be sure.
1754  *
1755  * Returns an copy of the string, but escaped
1756  *
1757  * 25 May 2001
1758  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1759  * according to RFC2396.
1760  *   - Carl Douglas
1761  */
1762 xmlChar *
1763 xmlURIEscape(const xmlChar * str)
1764 {
1765     xmlChar *ret, *segment = NULL;
1766     xmlURIPtr uri;
1767     int ret2;
1768
1769     if (str == NULL)
1770         return (NULL);
1771
1772     uri = xmlCreateURI();
1773     if (uri != NULL) {
1774         /*
1775          * Allow escaping errors in the unescaped form
1776          */
1777         uri->cleanup = 1;
1778         ret2 = xmlParseURIReference(uri, (const char *)str);
1779         if (ret2) {
1780             xmlFreeURI(uri);
1781             return (NULL);
1782         }
1783     }
1784
1785     if (!uri)
1786         return NULL;
1787
1788     ret = NULL;
1789
1790 #define NULLCHK(p) if(!p) { \
1791          xmlURIErrMemory("escaping URI value\n"); \
1792          xmlFreeURI(uri); \
1793          xmlFree(ret); \
1794          return NULL; } \
1795
1796     if (uri->scheme) {
1797         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1798         NULLCHK(segment)
1799         ret = xmlStrcat(ret, segment);
1800         ret = xmlStrcat(ret, BAD_CAST ":");
1801         xmlFree(segment);
1802     }
1803
1804     if (uri->authority) {
1805         segment =
1806             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1807         NULLCHK(segment)
1808         ret = xmlStrcat(ret, BAD_CAST "//");
1809         ret = xmlStrcat(ret, segment);
1810         xmlFree(segment);
1811     }
1812
1813     if (uri->user) {
1814         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1815         NULLCHK(segment)
1816         ret = xmlStrcat(ret,BAD_CAST "//");
1817         ret = xmlStrcat(ret, segment);
1818         ret = xmlStrcat(ret, BAD_CAST "@");
1819         xmlFree(segment);
1820     }
1821
1822     if (uri->server) {
1823         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1824         NULLCHK(segment)
1825         if (uri->user == NULL)
1826             ret = xmlStrcat(ret, BAD_CAST "//");
1827         ret = xmlStrcat(ret, segment);
1828         xmlFree(segment);
1829     }
1830
1831     if (uri->port > 0) {
1832         xmlChar port[11];
1833
1834         snprintf((char *) port, 11, "%d", uri->port);
1835         ret = xmlStrcat(ret, BAD_CAST ":");
1836         ret = xmlStrcat(ret, port);
1837     }
1838
1839     if (uri->path) {
1840         segment =
1841             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1842         NULLCHK(segment)
1843         ret = xmlStrcat(ret, segment);
1844         xmlFree(segment);
1845     }
1846
1847     if (uri->query_raw) {
1848         ret = xmlStrcat(ret, BAD_CAST "?");
1849         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1850     }
1851     else if (uri->query) {
1852         segment =
1853             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1854         NULLCHK(segment)
1855         ret = xmlStrcat(ret, BAD_CAST "?");
1856         ret = xmlStrcat(ret, segment);
1857         xmlFree(segment);
1858     }
1859
1860     if (uri->opaque) {
1861         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1862         NULLCHK(segment)
1863         ret = xmlStrcat(ret, segment);
1864         xmlFree(segment);
1865     }
1866
1867     if (uri->fragment) {
1868         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1869         NULLCHK(segment)
1870         ret = xmlStrcat(ret, BAD_CAST "#");
1871         ret = xmlStrcat(ret, segment);
1872         xmlFree(segment);
1873     }
1874
1875     xmlFreeURI(uri);
1876 #undef NULLCHK
1877
1878     return (ret);
1879 }
1880
1881 /************************************************************************
1882  *                                                                      *
1883  *                      Public functions                                *
1884  *                                                                      *
1885  ************************************************************************/
1886
1887 /**
1888  * xmlBuildURI:
1889  * @URI:  the URI instance found in the document
1890  * @base:  the base value
1891  *
1892  * Computes he final URI of the reference done by checking that
1893  * the given URI is valid, and building the final URI using the
1894  * base URI. This is processed according to section 5.2 of the
1895  * RFC 2396
1896  *
1897  * 5.2. Resolving Relative References to Absolute Form
1898  *
1899  * Returns a new URI string (to be freed by the caller) or NULL in case
1900  *         of error.
1901  */
1902 xmlChar *
1903 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1904     xmlChar *val = NULL;
1905     int ret, len, indx, cur, out;
1906     xmlURIPtr ref = NULL;
1907     xmlURIPtr bas = NULL;
1908     xmlURIPtr res = NULL;
1909
1910     /*
1911      * 1) The URI reference is parsed into the potential four components and
1912      *    fragment identifier, as described in Section 4.3.
1913      *
1914      *    NOTE that a completely empty URI is treated by modern browsers
1915      *    as a reference to "." rather than as a synonym for the current
1916      *    URI.  Should we do that here?
1917      */
1918     if (URI == NULL)
1919         ret = -1;
1920     else {
1921         if (*URI) {
1922             ref = xmlCreateURI();
1923             if (ref == NULL)
1924                 goto done;
1925             ret = xmlParseURIReference(ref, (const char *) URI);
1926         }
1927         else
1928             ret = 0;
1929     }
1930     if (ret != 0)
1931         goto done;
1932     if ((ref != NULL) && (ref->scheme != NULL)) {
1933         /*
1934          * The URI is absolute don't modify.
1935          */
1936         val = xmlStrdup(URI);
1937         goto done;
1938     }
1939     if (base == NULL)
1940         ret = -1;
1941     else {
1942         bas = xmlCreateURI();
1943         if (bas == NULL)
1944             goto done;
1945         ret = xmlParseURIReference(bas, (const char *) base);
1946     }
1947     if (ret != 0) {
1948         if (ref)
1949             val = xmlSaveUri(ref);
1950         goto done;
1951     }
1952     if (ref == NULL) {
1953         /*
1954          * the base fragment must be ignored
1955          */
1956         if (bas->fragment != NULL) {
1957             xmlFree(bas->fragment);
1958             bas->fragment = NULL;
1959         }
1960         val = xmlSaveUri(bas);
1961         goto done;
1962     }
1963
1964     /*
1965      * 2) If the path component is empty and the scheme, authority, and
1966      *    query components are undefined, then it is a reference to the
1967      *    current document and we are done.  Otherwise, the reference URI's
1968      *    query and fragment components are defined as found (or not found)
1969      *    within the URI reference and not inherited from the base URI.
1970      *
1971      *    NOTE that in modern browsers, the parsing differs from the above
1972      *    in the following aspect:  the query component is allowed to be
1973      *    defined while still treating this as a reference to the current
1974      *    document.
1975      */
1976     res = xmlCreateURI();
1977     if (res == NULL)
1978         goto done;
1979     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1980         ((ref->authority == NULL) && (ref->server == NULL) &&
1981          (ref->port == PORT_EMPTY))) {
1982         if (bas->scheme != NULL)
1983             res->scheme = xmlMemStrdup(bas->scheme);
1984         if (bas->authority != NULL)
1985             res->authority = xmlMemStrdup(bas->authority);
1986         else {
1987             if (bas->server != NULL)
1988                 res->server = xmlMemStrdup(bas->server);
1989             if (bas->user != NULL)
1990                 res->user = xmlMemStrdup(bas->user);
1991             res->port = bas->port;
1992         }
1993         if (bas->path != NULL)
1994             res->path = xmlMemStrdup(bas->path);
1995         if (ref->query_raw != NULL)
1996             res->query_raw = xmlMemStrdup (ref->query_raw);
1997         else if (ref->query != NULL)
1998             res->query = xmlMemStrdup(ref->query);
1999         else if (bas->query_raw != NULL)
2000             res->query_raw = xmlMemStrdup(bas->query_raw);
2001         else if (bas->query != NULL)
2002             res->query = xmlMemStrdup(bas->query);
2003         if (ref->fragment != NULL)
2004             res->fragment = xmlMemStrdup(ref->fragment);
2005         goto step_7;
2006     }
2007
2008     /*
2009      * 3) If the scheme component is defined, indicating that the reference
2010      *    starts with a scheme name, then the reference is interpreted as an
2011      *    absolute URI and we are done.  Otherwise, the reference URI's
2012      *    scheme is inherited from the base URI's scheme component.
2013      */
2014     if (ref->scheme != NULL) {
2015         val = xmlSaveUri(ref);
2016         goto done;
2017     }
2018     if (bas->scheme != NULL)
2019         res->scheme = xmlMemStrdup(bas->scheme);
2020
2021     if (ref->query_raw != NULL)
2022         res->query_raw = xmlMemStrdup(ref->query_raw);
2023     else if (ref->query != NULL)
2024         res->query = xmlMemStrdup(ref->query);
2025     if (ref->fragment != NULL)
2026         res->fragment = xmlMemStrdup(ref->fragment);
2027
2028     /*
2029      * 4) If the authority component is defined, then the reference is a
2030      *    network-path and we skip to step 7.  Otherwise, the reference
2031      *    URI's authority is inherited from the base URI's authority
2032      *    component, which will also be undefined if the URI scheme does not
2033      *    use an authority component.
2034      */
2035     if ((ref->authority != NULL) || (ref->server != NULL) ||
2036          (ref->port != PORT_EMPTY)) {
2037         if (ref->authority != NULL)
2038             res->authority = xmlMemStrdup(ref->authority);
2039         else {
2040             if (ref->server != NULL)
2041                 res->server = xmlMemStrdup(ref->server);
2042             if (ref->user != NULL)
2043                 res->user = xmlMemStrdup(ref->user);
2044             res->port = ref->port;
2045         }
2046         if (ref->path != NULL)
2047             res->path = xmlMemStrdup(ref->path);
2048         goto step_7;
2049     }
2050     if (bas->authority != NULL)
2051         res->authority = xmlMemStrdup(bas->authority);
2052     else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2053         if (bas->server != NULL)
2054             res->server = xmlMemStrdup(bas->server);
2055         if (bas->user != NULL)
2056             res->user = xmlMemStrdup(bas->user);
2057         res->port = bas->port;
2058     }
2059
2060     /*
2061      * 5) If the path component begins with a slash character ("/"), then
2062      *    the reference is an absolute-path and we skip to step 7.
2063      */
2064     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2065         res->path = xmlMemStrdup(ref->path);
2066         goto step_7;
2067     }
2068
2069
2070     /*
2071      * 6) If this step is reached, then we are resolving a relative-path
2072      *    reference.  The relative path needs to be merged with the base
2073      *    URI's path.  Although there are many ways to do this, we will
2074      *    describe a simple method using a separate string buffer.
2075      *
2076      * Allocate a buffer large enough for the result string.
2077      */
2078     len = 2; /* extra / and 0 */
2079     if (ref->path != NULL)
2080         len += strlen(ref->path);
2081     if (bas->path != NULL)
2082         len += strlen(bas->path);
2083     res->path = (char *) xmlMallocAtomic(len);
2084     if (res->path == NULL) {
2085         xmlURIErrMemory("resolving URI against base\n");
2086         goto done;
2087     }
2088     res->path[0] = 0;
2089
2090     /*
2091      * a) All but the last segment of the base URI's path component is
2092      *    copied to the buffer.  In other words, any characters after the
2093      *    last (right-most) slash character, if any, are excluded.
2094      */
2095     cur = 0;
2096     out = 0;
2097     if (bas->path != NULL) {
2098         while (bas->path[cur] != 0) {
2099             while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2100                 cur++;
2101             if (bas->path[cur] == 0)
2102                 break;
2103
2104             cur++;
2105             while (out < cur) {
2106                 res->path[out] = bas->path[out];
2107                 out++;
2108             }
2109         }
2110     }
2111     res->path[out] = 0;
2112
2113     /*
2114      * b) The reference's path component is appended to the buffer
2115      *    string.
2116      */
2117     if (ref->path != NULL && ref->path[0] != 0) {
2118         indx = 0;
2119         /*
2120          * Ensure the path includes a '/'
2121          */
2122         if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2123             res->path[out++] = '/';
2124         while (ref->path[indx] != 0) {
2125             res->path[out++] = ref->path[indx++];
2126         }
2127     }
2128     res->path[out] = 0;
2129
2130     /*
2131      * Steps c) to h) are really path normalization steps
2132      */
2133     xmlNormalizeURIPath(res->path);
2134
2135 step_7:
2136
2137     /*
2138      * 7) The resulting URI components, including any inherited from the
2139      *    base URI, are recombined to give the absolute form of the URI
2140      *    reference.
2141      */
2142     val = xmlSaveUri(res);
2143
2144 done:
2145     if (ref != NULL)
2146         xmlFreeURI(ref);
2147     if (bas != NULL)
2148         xmlFreeURI(bas);
2149     if (res != NULL)
2150         xmlFreeURI(res);
2151     return(val);
2152 }
2153
2154 /**
2155  * xmlBuildRelativeURI:
2156  * @URI:  the URI reference under consideration
2157  * @base:  the base value
2158  *
2159  * Expresses the URI of the reference in terms relative to the
2160  * base.  Some examples of this operation include:
2161  *     base = "http://site1.com/docs/book1.html"
2162  *        URI input                        URI returned
2163  *     docs/pic1.gif                    pic1.gif
2164  *     docs/img/pic1.gif                img/pic1.gif
2165  *     img/pic1.gif                     ../img/pic1.gif
2166  *     http://site1.com/docs/pic1.gif   pic1.gif
2167  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2168  *
2169  *     base = "docs/book1.html"
2170  *        URI input                        URI returned
2171  *     docs/pic1.gif                    pic1.gif
2172  *     docs/img/pic1.gif                img/pic1.gif
2173  *     img/pic1.gif                     ../img/pic1.gif
2174  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2175  *
2176  *
2177  * Note: if the URI reference is really weird or complicated, it may be
2178  *       worthwhile to first convert it into a "nice" one by calling
2179  *       xmlBuildURI (using 'base') before calling this routine,
2180  *       since this routine (for reasonable efficiency) assumes URI has
2181  *       already been through some validation.
2182  *
2183  * Returns a new URI string (to be freed by the caller) or NULL in case
2184  * error.
2185  */
2186 xmlChar *
2187 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2188 {
2189     xmlChar *val = NULL;
2190     int ret;
2191     int ix;
2192     int nbslash = 0;
2193     int len;
2194     xmlURIPtr ref = NULL;
2195     xmlURIPtr bas = NULL;
2196     xmlChar *bptr, *uptr, *vptr;
2197     int remove_path = 0;
2198
2199     if ((URI == NULL) || (*URI == 0))
2200         return NULL;
2201
2202     /*
2203      * First parse URI into a standard form
2204      */
2205     ref = xmlCreateURI ();
2206     if (ref == NULL)
2207         return NULL;
2208     /* If URI not already in "relative" form */
2209     if (URI[0] != '.') {
2210         ret = xmlParseURIReference (ref, (const char *) URI);
2211         if (ret != 0)
2212             goto done;          /* Error in URI, return NULL */
2213     } else
2214         ref->path = (char *)xmlStrdup(URI);
2215
2216     /*
2217      * Next parse base into the same standard form
2218      */
2219     if ((base == NULL) || (*base == 0)) {
2220         val = xmlStrdup (URI);
2221         goto done;
2222     }
2223     bas = xmlCreateURI ();
2224     if (bas == NULL)
2225         goto done;
2226     if (base[0] != '.') {
2227         ret = xmlParseURIReference (bas, (const char *) base);
2228         if (ret != 0)
2229             goto done;          /* Error in base, return NULL */
2230     } else
2231         bas->path = (char *)xmlStrdup(base);
2232
2233     /*
2234      * If the scheme / server on the URI differs from the base,
2235      * just return the URI
2236      */
2237     if ((ref->scheme != NULL) &&
2238         ((bas->scheme == NULL) ||
2239          (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2240          (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2241          (bas->port != ref->port))) {
2242         val = xmlStrdup (URI);
2243         goto done;
2244     }
2245     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2246         val = xmlStrdup(BAD_CAST "");
2247         goto done;
2248     }
2249     if (bas->path == NULL) {
2250         val = xmlStrdup((xmlChar *)ref->path);
2251         goto done;
2252     }
2253     if (ref->path == NULL) {
2254         ref->path = (char *) "/";
2255         remove_path = 1;
2256     }
2257
2258     /*
2259      * At this point (at last!) we can compare the two paths
2260      *
2261      * First we take care of the special case where either of the
2262      * two path components may be missing (bug 316224)
2263      */
2264     bptr = (xmlChar *)bas->path;
2265     {
2266         xmlChar *rptr = (xmlChar *) ref->path;
2267         int pos = 0;
2268
2269         /*
2270          * Next we compare the two strings and find where they first differ
2271          */
2272         if ((*rptr == '.') && (rptr[1] == '/'))
2273             rptr += 2;
2274         if ((*bptr == '.') && (bptr[1] == '/'))
2275             bptr += 2;
2276         else if ((*bptr == '/') && (*rptr != '/'))
2277             bptr++;
2278         while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2279             pos++;
2280
2281         if (bptr[pos] == rptr[pos]) {
2282             val = xmlStrdup(BAD_CAST "");
2283             goto done;          /* (I can't imagine why anyone would do this) */
2284         }
2285
2286         /*
2287          * In URI, "back up" to the last '/' encountered.  This will be the
2288          * beginning of the "unique" suffix of URI
2289          */
2290         ix = pos;
2291         for (; ix > 0; ix--) {
2292             if (rptr[ix - 1] == '/')
2293                 break;
2294         }
2295         uptr = (xmlChar *)&rptr[ix];
2296
2297         /*
2298          * In base, count the number of '/' from the differing point
2299          */
2300         for (; bptr[ix] != 0; ix++) {
2301             if (bptr[ix] == '/')
2302                 nbslash++;
2303         }
2304
2305         /*
2306          * e.g: URI="foo/" base="foo/bar" -> "./"
2307          */
2308         if (nbslash == 0 && !uptr[0]) {
2309             val = xmlStrdup(BAD_CAST "./");
2310             goto done;
2311         }
2312
2313         len = xmlStrlen (uptr) + 1;
2314     }
2315
2316     if (nbslash == 0) {
2317         if (uptr != NULL)
2318             /* exception characters from xmlSaveUri */
2319             val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2320         goto done;
2321     }
2322
2323     /*
2324      * Allocate just enough space for the returned string -
2325      * length of the remainder of the URI, plus enough space
2326      * for the "../" groups, plus one for the terminator
2327      */
2328     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2329     if (val == NULL) {
2330         xmlURIErrMemory("building relative URI\n");
2331         goto done;
2332     }
2333     vptr = val;
2334     /*
2335      * Put in as many "../" as needed
2336      */
2337     for (; nbslash>0; nbslash--) {
2338         *vptr++ = '.';
2339         *vptr++ = '.';
2340         *vptr++ = '/';
2341     }
2342     /*
2343      * Finish up with the end of the URI
2344      */
2345     if (uptr != NULL) {
2346         if ((vptr > val) && (len > 0) &&
2347             (uptr[0] == '/') && (vptr[-1] == '/')) {
2348             memcpy (vptr, uptr + 1, len - 1);
2349             vptr[len - 2] = 0;
2350         } else {
2351             memcpy (vptr, uptr, len);
2352             vptr[len - 1] = 0;
2353         }
2354     } else {
2355         vptr[len - 1] = 0;
2356     }
2357
2358     /* escape the freshly-built path */
2359     vptr = val;
2360         /* exception characters from xmlSaveUri */
2361     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2362     xmlFree(vptr);
2363
2364 done:
2365     /*
2366      * Free the working variables
2367      */
2368     if (remove_path != 0)
2369         ref->path = NULL;
2370     if (ref != NULL)
2371         xmlFreeURI (ref);
2372     if (bas != NULL)
2373         xmlFreeURI (bas);
2374
2375     return val;
2376 }
2377
2378 /**
2379  * xmlCanonicPath:
2380  * @path:  the resource locator in a filesystem notation
2381  *
2382  * Constructs a canonic path from the specified path.
2383  *
2384  * Returns a new canonic path, or a duplicate of the path parameter if the
2385  * construction fails. The caller is responsible for freeing the memory occupied
2386  * by the returned string. If there is insufficient memory available, or the
2387  * argument is NULL, the function returns NULL.
2388  */
2389 #define IS_WINDOWS_PATH(p)                                      \
2390         ((p != NULL) &&                                         \
2391          (((p[0] >= 'a') && (p[0] <= 'z')) ||                   \
2392           ((p[0] >= 'A') && (p[0] <= 'Z'))) &&                  \
2393          (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2394 xmlChar *
2395 xmlCanonicPath(const xmlChar *path)
2396 {
2397 /*
2398  * For Windows implementations, additional work needs to be done to
2399  * replace backslashes in pathnames with "forward slashes"
2400  */
2401 #if defined(_WIN32)
2402     int len = 0;
2403     char *p = NULL;
2404 #endif
2405     xmlURIPtr uri;
2406     xmlChar *ret;
2407     const xmlChar *absuri;
2408
2409     if (path == NULL)
2410         return(NULL);
2411
2412 #if defined(_WIN32)
2413     /*
2414      * We must not change the backslashes to slashes if the the path
2415      * starts with \\?\
2416      * Those paths can be up to 32k characters long.
2417      * Was added specifically for OpenOffice, those paths can't be converted
2418      * to URIs anyway.
2419      */
2420     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2421         (path[3] == '\\') )
2422         return xmlStrdup((const xmlChar *) path);
2423 #endif
2424
2425         /* sanitize filename starting with // so it can be used as URI */
2426     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2427         path++;
2428
2429     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2430         xmlFreeURI(uri);
2431         return xmlStrdup(path);
2432     }
2433
2434     /* Check if this is an "absolute uri" */
2435     absuri = xmlStrstr(path, BAD_CAST "://");
2436     if (absuri != NULL) {
2437         int l, j;
2438         unsigned char c;
2439         xmlChar *escURI;
2440
2441         /*
2442          * this looks like an URI where some parts have not been
2443          * escaped leading to a parsing problem.  Check that the first
2444          * part matches a protocol.
2445          */
2446         l = absuri - path;
2447         /* Bypass if first part (part before the '://') is > 20 chars */
2448         if ((l <= 0) || (l > 20))
2449             goto path_processing;
2450         /* Bypass if any non-alpha characters are present in first part */
2451         for (j = 0;j < l;j++) {
2452             c = path[j];
2453             if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2454                 goto path_processing;
2455         }
2456
2457         /* Escape all except the characters specified in the supplied path */
2458         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2459         if (escURI != NULL) {
2460             /* Try parsing the escaped path */
2461             uri = xmlParseURI((const char *) escURI);
2462             /* If successful, return the escaped string */
2463             if (uri != NULL) {
2464                 xmlFreeURI(uri);
2465                 return escURI;
2466             }
2467             xmlFree(escURI);
2468         }
2469     }
2470
2471 path_processing:
2472 /* For Windows implementations, replace backslashes with 'forward slashes' */
2473 #if defined(_WIN32)
2474     /*
2475      * Create a URI structure
2476      */
2477     uri = xmlCreateURI();
2478     if (uri == NULL) {          /* Guard against 'out of memory' */
2479         return(NULL);
2480     }
2481
2482     len = xmlStrlen(path);
2483     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2484         /* make the scheme 'file' */
2485         uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2486         /* allocate space for leading '/' + path + string terminator */
2487         uri->path = xmlMallocAtomic(len + 2);
2488         if (uri->path == NULL) {
2489             xmlFreeURI(uri);    /* Guard against 'out of memory' */
2490             return(NULL);
2491         }
2492         /* Put in leading '/' plus path */
2493         uri->path[0] = '/';
2494         p = uri->path + 1;
2495         strncpy(p, (char *) path, len + 1);
2496     } else {
2497         uri->path = (char *) xmlStrdup(path);
2498         if (uri->path == NULL) {
2499             xmlFreeURI(uri);
2500             return(NULL);
2501         }
2502         p = uri->path;
2503     }
2504     /* Now change all occurrences of '\' to '/' */
2505     while (*p != '\0') {
2506         if (*p == '\\')
2507             *p = '/';
2508         p++;
2509     }
2510
2511     if (uri->scheme == NULL) {
2512         ret = xmlStrdup((const xmlChar *) uri->path);
2513     } else {
2514         ret = xmlSaveUri(uri);
2515     }
2516
2517     xmlFreeURI(uri);
2518 #else
2519     ret = xmlStrdup((const xmlChar *) path);
2520 #endif
2521     return(ret);
2522 }
2523
2524 /**
2525  * xmlPathToURI:
2526  * @path:  the resource locator in a filesystem notation
2527  *
2528  * Constructs an URI expressing the existing path
2529  *
2530  * Returns a new URI, or a duplicate of the path parameter if the
2531  * construction fails. The caller is responsible for freeing the memory
2532  * occupied by the returned string. If there is insufficient memory available,
2533  * or the argument is NULL, the function returns NULL.
2534  */
2535 xmlChar *
2536 xmlPathToURI(const xmlChar *path)
2537 {
2538     xmlURIPtr uri;
2539     xmlURI temp;
2540     xmlChar *ret, *cal;
2541
2542     if (path == NULL)
2543         return(NULL);
2544
2545     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2546         xmlFreeURI(uri);
2547         return xmlStrdup(path);
2548     }
2549     cal = xmlCanonicPath(path);
2550     if (cal == NULL)
2551         return(NULL);
2552 #if defined(_WIN32)
2553     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2554        If 'cal' is a valid URI already then we are done here, as continuing would make
2555        it invalid. */
2556     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2557         xmlFreeURI(uri);
2558         return cal;
2559     }
2560     /* 'cal' can contain a relative path with backslashes. If that is processed
2561        by xmlSaveURI, they will be escaped and the external entity loader machinery
2562        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2563     ret = cal;
2564     while (*ret != '\0') {
2565         if (*ret == '\\')
2566             *ret = '/';
2567         ret++;
2568     }
2569 #endif
2570     memset(&temp, 0, sizeof(temp));
2571     temp.path = (char *) cal;
2572     ret = xmlSaveUri(&temp);
2573     xmlFree(cal);
2574     return(ret);
2575 }