util/uri.c

   1 /**
   2  * uri.c: set of generic URI related routines
   3  *
   4  * Reference: RFCs 3986, 2732 and 2373
   5  *
   6  * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a copy
   9  * of this software and associated documentation files (the "Software"), to deal
  10  * in the Software without restriction, including without limitation the rights
  11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12  * copies of the Software, and to permit persons to whom the Software is
  13  * furnished to do so, subject to the following conditions:
  14  *
  15  * The above copyright notice and this permission notice shall be included in
  16  * all copies or substantial portions of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  21  * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  22  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  *
  25  * Except as contained in this notice, the name of Daniel Veillard shall not
  26  * be used in advertising or otherwise to promote the sale, use or other
  27  * dealings in this Software without prior written authorization from him.
  28  *
  29  * daniel@veillard.com
  30  *
  31  **
  32  *
  33  * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
  34  *
  35  * This library is free software; you can redistribute it and/or
  36  * modify it under the terms of the GNU Lesser General Public
  37  * License as published by the Free Software Foundation; either
  38  * version 2.1 of the License, or (at your option) any later version.
  39  *
  40  * This library is distributed in the hope that it will be useful,
  41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  43  * Lesser General Public License for more details.
  44  *
  45  * You should have received a copy of the GNU Lesser General Public
  46  * License along with this library. If not, see <https://www.gnu.org/licenses/>.
  47  *
  48  * Authors:
  49  *    Richard W.M. Jones <rjones@redhat.com>
  50  *
  51  */
  52
  53 #include "qemu/osdep.h"
  54 #include "qemu/cutils.h"
  55
  56 #include "qemu/uri.h"
  57
  58 static void uri_clean(URI *uri);
  59
  60 /*
  61  * Old rule from 2396 used in legacy handling code
  62  * alpha    = lowalpha | upalpha
  63  */
  64 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  65
  66 /*
  67  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  68  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  69  *            "u" | "v" | "w" | "x" | "y" | "z"
  70  */
  71
  72 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  73
  74 /*
  75  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  76  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  77  *           "U" | "V" | "W" | "X" | "Y" | "Z"
  78  */
  79 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  80
  81 #ifdef IS_DIGIT
  82 #undef IS_DIGIT
  83 #endif
  84 /*
  85  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  86  */
  87 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  88
  89 /*
  90  * alphanum = alpha | digit
  91  */
  92
  93 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  94
  95 /*
  96  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  97  */
  98
  99 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||            \
 100     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||           \
 101     ((x) == '(') || ((x) == ')'))
 102
 103 /*
 104  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 105  */
 106
 107 #define IS_UNWISE(p)                                                           \
 108     (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||                  \
 109      ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||                 \
 110      ((*(p) == ']')) || ((*(p) == '`')))
 111 /*
 112  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 113  *            "[" | "]"
 114  */
 115
 116 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') ||        \
 117     ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') ||            \
 118     ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') ||            \
 119     ((x) == ']'))
 120
 121 /*
 122  * unreserved = alphanum | mark
 123  */
 124
 125 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 126
 127 /*
 128  * Skip to next pointer char, handle escaped sequences
 129  */
 130
 131 #define NEXT(p) ((*p == '%') ? p += 3 : p++)
 132
 133 /*
 134  * Productions from the spec.
 135  *
 136  *    authority     = server | reg_name
 137  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 138  *                        ";" | ":" | "@" | "&" | "=" | "+" )
 139  *
 140  * path          = [ abs_path | opaque_part ]
 141  */
 142
 143 /************************************************************************
 144  *                                                                      *
 145  *                         RFC 3986 parser                              *
 146  *                                                                      *
 147  ************************************************************************/
 148
 149 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 150 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||                      \
 151                       ((*(p) >= 'A') && (*(p) <= 'Z')))
 152 #define ISA_HEXDIG(p)                                                          \
 153     (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||                       \
 154      ((*(p) >= 'A') && (*(p) <= 'F')))
 155
 156 /*
 157  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 158  *                     / "*" / "+" / "," / ";" / "="
 159  */
 160 #define ISA_SUB_DELIM(p)                                                       \
 161     (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||                  \
 162      ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||                  \
 163      ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||                  \
 164      ((*(p) == '=')) || ((*(p) == '\'')))
 165
 166 /*
 167  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 168  */
 169 #define ISA_UNRESERVED(p)                                                      \
 170     ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||                    \
 171      ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 172
 173 /*
 174  *    pct-encoded   = "%" HEXDIG HEXDIG
 175  */
 176 #define ISA_PCT_ENCODED(p)                                                     \
 177     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 178
 179 /*
 180  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 181  */
 182 #define ISA_PCHAR(p)                                                           \
 183     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||            \
 184      ((*(p) == ':')) || ((*(p) == '@')))
 185
 186 /**
 187  * rfc3986_parse_scheme:
 188  * @uri:  pointer to an URI structure
 189  * @str:  pointer to the string to analyze
 190  *
 191  * Parse an URI scheme
 192  *
 193  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 194  *
 195  * Returns 0 or the error code
 196  */
 197 static int rfc3986_parse_scheme(URI *uri, const char **str)
 198 {
 199     const char *cur;
 200
 201     if (str == NULL) {
 202         return -1;
 203     }
 204
 205     cur = *str;
 206     if (!ISA_ALPHA(cur)) {
 207         return 2;
 208     }
 209     cur++;
 210     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || (*cur == '+') || (*cur == '-') ||
 211            (*cur == '.')) {
 212         cur++;
 213     }
 214     if (uri != NULL) {
 215         g_free(uri->scheme);
 216         uri->scheme = g_strndup(*str, cur - *str);
 217     }
 218     *str = cur;
 219     return 0;
 220 }
 221
 222 /**
 223  * rfc3986_parse_fragment:
 224  * @uri:  pointer to an URI structure
 225  * @str:  pointer to the string to analyze
 226  *
 227  * Parse the query part of an URI
 228  *
 229  * fragment      = *( pchar / "/" / "?" )
 230  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 231  *       in the fragment identifier but this is used very broadly for
 232  *       xpointer scheme selection, so we are allowing it here to not break
 233  *       for example all the DocBook processing chains.
 234  *
 235  * Returns 0 or the error code
 236  */
 237 static int rfc3986_parse_fragment(URI *uri, const char **str)
 238 {
 239     const char *cur;
 240
 241     if (str == NULL) {
 242         return -1;
 243     }
 244
 245     cur = *str;
 246
 247     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 248            (*cur == '[') || (*cur == ']') ||
 249            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
 250         NEXT(cur);
 251     }
 252     if (uri != NULL) {
 253         g_free(uri->fragment);
 254         if (uri->cleanup & 2) {
 255             uri->fragment = g_strndup(*str, cur - *str);
 256         } else {
 257             uri->fragment = g_uri_unescape_segment(*str, cur, NULL);
 258         }
 259     }
 260     *str = cur;
 261     return 0;
 262 }
 263
 264 /**
 265  * rfc3986_parse_query:
 266  * @uri:  pointer to an URI structure
 267  * @str:  pointer to the string to analyze
 268  *
 269  * Parse the query part of an URI
 270  *
 271  * query = *uric
 272  *
 273  * Returns 0 or the error code
 274  */
 275 static int rfc3986_parse_query(URI *uri, const char **str)
 276 {
 277     const char *cur;
 278
 279     if (str == NULL) {
 280         return -1;
 281     }
 282
 283     cur = *str;
 284
 285     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 286            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
 287         NEXT(cur);
 288     }
 289     if (uri != NULL) {
 290         g_free(uri->query);
 291         uri->query = g_strndup(*str, cur - *str);
 292     }
 293     *str = cur;
 294     return 0;
 295 }
 296
 297 /**
 298  * rfc3986_parse_port:
 299  * @uri:  pointer to an URI structure
 300  * @str:  the string to analyze
 301  *
 302  * Parse a port  part and fills in the appropriate fields
 303  * of the @uri structure
 304  *
 305  * port          = *DIGIT
 306  *
 307  * Returns 0 or the error code
 308  */
 309 static int rfc3986_parse_port(URI *uri, const char **str)
 310 {
 311     const char *cur = *str;
 312     int port = 0;
 313
 314     if (ISA_DIGIT(cur)) {
 315         while (ISA_DIGIT(cur)) {
 316             port = port * 10 + (*cur - '0');
 317             if (port > 65535) {
 318                 return 1;
 319             }
 320             cur++;
 321         }
 322         if (uri) {
 323             uri->port = port;
 324         }
 325         *str = cur;
 326         return 0;
 327     }
 328     return 1;
 329 }
 330
 331 /**
 332  * rfc3986_parse_user_info:
 333  * @uri:  pointer to an URI structure
 334  * @str:  the string to analyze
 335  *
 336  * Parse a user information part and fill in the appropriate fields
 337  * of the @uri structure
 338  *
 339  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 340  *
 341  * Returns 0 or the error code
 342  */
 343 static int rfc3986_parse_user_info(URI *uri, const char **str)
 344 {
 345     const char *cur;
 346
 347     cur = *str;
 348     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur) ||
 349            (*cur == ':')) {
 350         NEXT(cur);
 351     }
 352     if (*cur == '@') {
 353         if (uri != NULL) {
 354             g_free(uri->user);
 355             if (uri->cleanup & 2) {
 356                 uri->user = g_strndup(*str, cur - *str);
 357             } else {
 358                 uri->user = g_uri_unescape_segment(*str, cur, NULL);
 359             }
 360         }
 361         *str = cur;
 362         return 0;
 363     }
 364     return 1;
 365 }
 366
 367 /**
 368  * rfc3986_parse_dec_octet:
 369  * @str:  the string to analyze
 370  *
 371  *    dec-octet     = DIGIT                 ; 0-9
 372  *                  / %x31-39 DIGIT         ; 10-99
 373  *                  / "1" 2DIGIT            ; 100-199
 374  *                  / "2" %x30-34 DIGIT     ; 200-249
 375  *                  / "25" %x30-35          ; 250-255
 376  *
 377  * Skip a dec-octet.
 378  *
 379  * Returns 0 if found and skipped, 1 otherwise
 380  */
 381 static int rfc3986_parse_dec_octet(const char **str)
 382 {
 383     const char *cur = *str;
 384
 385     if (!(ISA_DIGIT(cur))) {
 386         return 1;
 387     }
 388     if (!ISA_DIGIT(cur + 1)) {
 389         cur++;
 390     } else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur + 2))) {
 391         cur += 2;
 392     } else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) {
 393         cur += 3;
 394     } else if ((*cur == '2') && (*(cur + 1) >= '0') && (*(cur + 1) <= '4') &&
 395              (ISA_DIGIT(cur + 2))) {
 396         cur += 3;
 397     } else if ((*cur == '2') && (*(cur + 1) == '5') && (*(cur + 2) >= '0') &&
 398              (*(cur + 1) <= '5')) {
 399         cur += 3;
 400     } else {
 401         return 1;
 402     }
 403     *str = cur;
 404     return 0;
 405 }
 406 /**
 407  * rfc3986_parse_host:
 408  * @uri:  pointer to an URI structure
 409  * @str:  the string to analyze
 410  *
 411  * Parse an host part and fills in the appropriate fields
 412  * of the @uri structure
 413  *
 414  * host          = IP-literal / IPv4address / reg-name
 415  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 416  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 417  * reg-name      = *( unreserved / pct-encoded / sub-delims )
 418  *
 419  * Returns 0 or the error code
 420  */
 421 static int rfc3986_parse_host(URI *uri, const char **str)
 422 {
 423     const char *cur = *str;
 424     const char *host;
 425
 426     host = cur;
 427     /*
 428      * IPv6 and future addressing scheme are enclosed between brackets
 429      */
 430     if (*cur == '[') {
 431         cur++;
 432         while ((*cur != ']') && (*cur != 0)) {
 433             cur++;
 434         }
 435         if (*cur != ']') {
 436             return 1;
 437         }
 438         cur++;
 439         goto found;
 440     }
 441     /*
 442      * try to parse an IPv4
 443      */
 444     if (ISA_DIGIT(cur)) {
 445         if (rfc3986_parse_dec_octet(&cur) != 0) {
 446             goto not_ipv4;
 447         }
 448         if (*cur != '.') {
 449             goto not_ipv4;
 450         }
 451         cur++;
 452         if (rfc3986_parse_dec_octet(&cur) != 0) {
 453             goto not_ipv4;
 454         }
 455         if (*cur != '.') {
 456             goto not_ipv4;
 457         }
 458         if (rfc3986_parse_dec_octet(&cur) != 0) {
 459             goto not_ipv4;
 460         }
 461         if (*cur != '.') {
 462             goto not_ipv4;
 463         }
 464         if (rfc3986_parse_dec_octet(&cur) != 0) {
 465             goto not_ipv4;
 466         }
 467         goto found;
 468     not_ipv4:
 469         cur = *str;
 470     }
 471     /*
 472      * then this should be a hostname which can be empty
 473      */
 474     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) {
 475         NEXT(cur);
 476     }
 477 found:
 478     if (uri != NULL) {
 479         g_free(uri->authority);
 480         uri->authority = NULL;
 481         g_free(uri->server);
 482         if (cur != host) {
 483             if (uri->cleanup & 2) {
 484                 uri->server = g_strndup(host, cur - host);
 485             } else {
 486                 uri->server = g_uri_unescape_segment(host, cur, NULL);
 487             }
 488         } else {
 489             uri->server = NULL;
 490         }
 491     }
 492     *str = cur;
 493     return 0;
 494 }
 495
 496 /**
 497  * rfc3986_parse_authority:
 498  * @uri:  pointer to an URI structure
 499  * @str:  the string to analyze
 500  *
 501  * Parse an authority part and fills in the appropriate fields
 502  * of the @uri structure
 503  *
 504  * authority     = [ userinfo "@" ] host [ ":" port ]
 505  *
 506  * Returns 0 or the error code
 507  */
 508 static int rfc3986_parse_authority(URI *uri, const char **str)
 509 {
 510     const char *cur;
 511     int ret;
 512
 513     cur = *str;
 514     /*
 515      * try to parse a userinfo and check for the trailing @
 516      */
 517     ret = rfc3986_parse_user_info(uri, &cur);
 518     if ((ret != 0) || (*cur != '@')) {
 519         cur = *str;
 520     } else {
 521         cur++;
 522     }
 523     ret = rfc3986_parse_host(uri, &cur);
 524     if (ret != 0) {
 525         return ret;
 526     }
 527     if (*cur == ':') {
 528         cur++;
 529         ret = rfc3986_parse_port(uri, &cur);
 530         if (ret != 0) {
 531             return ret;
 532         }
 533     }
 534     *str = cur;
 535     return 0;
 536 }
 537
 538 /**
 539  * rfc3986_parse_segment:
 540  * @str:  the string to analyze
 541  * @forbid: an optional forbidden character
 542  * @empty: allow an empty segment
 543  *
 544  * Parse a segment and fills in the appropriate fields
 545  * of the @uri structure
 546  *
 547  * segment       = *pchar
 548  * segment-nz    = 1*pchar
 549  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 550  *               ; non-zero-length segment without any colon ":"
 551  *
 552  * Returns 0 or the error code
 553  */
 554 static int rfc3986_parse_segment(const char **str, char forbid, int empty)
 555 {
 556     const char *cur;
 557
 558     cur = *str;
 559     if (!ISA_PCHAR(cur)) {
 560         if (empty) {
 561             return 0;
 562         }
 563         return 1;
 564     }
 565     while (ISA_PCHAR(cur) && (*cur != forbid)) {
 566         NEXT(cur);
 567     }
 568     *str = cur;
 569     return 0;
 570 }
 571
 572 /**
 573  * rfc3986_parse_path_ab_empty:
 574  * @uri:  pointer to an URI structure
 575  * @str:  the string to analyze
 576  *
 577  * Parse an path absolute or empty and fills in the appropriate fields
 578  * of the @uri structure
 579  *
 580  * path-abempty  = *( "/" segment )
 581  *
 582  * Returns 0 or the error code
 583  */
 584 static int rfc3986_parse_path_ab_empty(URI *uri, const char **str)
 585 {
 586     const char *cur;
 587     int ret;
 588
 589     cur = *str;
 590
 591     while (*cur == '/') {
 592         cur++;
 593         ret = rfc3986_parse_segment(&cur, 0, 1);
 594         if (ret != 0) {
 595             return ret;
 596         }
 597     }
 598     if (uri != NULL) {
 599         g_free(uri->path);
 600         if (*str != cur) {
 601             if (uri->cleanup & 2) {
 602                 uri->path = g_strndup(*str, cur - *str);
 603             } else {
 604                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
 605             }
 606         } else {
 607             uri->path = NULL;
 608         }
 609     }
 610     *str = cur;
 611     return 0;
 612 }
 613
 614 /**
 615  * rfc3986_parse_path_absolute:
 616  * @uri:  pointer to an URI structure
 617  * @str:  the string to analyze
 618  *
 619  * Parse an path absolute and fills in the appropriate fields
 620  * of the @uri structure
 621  *
 622  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 623  *
 624  * Returns 0 or the error code
 625  */
 626 static int rfc3986_parse_path_absolute(URI *uri, const char **str)
 627 {
 628     const char *cur;
 629     int ret;
 630
 631     cur = *str;
 632
 633     if (*cur != '/') {
 634         return 1;
 635     }
 636     cur++;
 637     ret = rfc3986_parse_segment(&cur, 0, 0);
 638     if (ret == 0) {
 639         while (*cur == '/') {
 640             cur++;
 641             ret = rfc3986_parse_segment(&cur, 0, 1);
 642             if (ret != 0) {
 643                 return ret;
 644             }
 645         }
 646     }
 647     if (uri != NULL) {
 648         g_free(uri->path);
 649         if (cur != *str) {
 650             if (uri->cleanup & 2) {
 651                 uri->path = g_strndup(*str, cur - *str);
 652             } else {
 653                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
 654             }
 655         } else {
 656             uri->path = NULL;
 657         }
 658     }
 659     *str = cur;
 660     return 0;
 661 }
 662
 663 /**
 664  * rfc3986_parse_path_rootless:
 665  * @uri:  pointer to an URI structure
 666  * @str:  the string to analyze
 667  *
 668  * Parse an path without root and fills in the appropriate fields
 669  * of the @uri structure
 670  *
 671  * path-rootless = segment-nz *( "/" segment )
 672  *
 673  * Returns 0 or the error code
 674  */
 675 static int rfc3986_parse_path_rootless(URI *uri, const char **str)
 676 {
 677     const char *cur;
 678     int ret;
 679
 680     cur = *str;
 681
 682     ret = rfc3986_parse_segment(&cur, 0, 0);
 683     if (ret != 0) {
 684         return ret;
 685     }
 686     while (*cur == '/') {
 687         cur++;
 688         ret = rfc3986_parse_segment(&cur, 0, 1);
 689         if (ret != 0) {
 690             return ret;
 691         }
 692     }
 693     if (uri != NULL) {
 694         g_free(uri->path);
 695         if (cur != *str) {
 696             if (uri->cleanup & 2) {
 697                 uri->path = g_strndup(*str, cur - *str);
 698             } else {
 699                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
 700             }
 701         } else {
 702             uri->path = NULL;
 703         }
 704     }
 705     *str = cur;
 706     return 0;
 707 }
 708
 709 /**
 710  * rfc3986_parse_path_no_scheme:
 711  * @uri:  pointer to an URI structure
 712  * @str:  the string to analyze
 713  *
 714  * Parse an path which is not a scheme and fills in the appropriate fields
 715  * of the @uri structure
 716  *
 717  * path-noscheme = segment-nz-nc *( "/" segment )
 718  *
 719  * Returns 0 or the error code
 720  */
 721 static int rfc3986_parse_path_no_scheme(URI *uri, const char **str)
 722 {
 723     const char *cur;
 724     int ret;
 725
 726     cur = *str;
 727
 728     ret = rfc3986_parse_segment(&cur, ':', 0);
 729     if (ret != 0) {
 730         return ret;
 731     }
 732     while (*cur == '/') {
 733         cur++;
 734         ret = rfc3986_parse_segment(&cur, 0, 1);
 735         if (ret != 0) {
 736             return ret;
 737         }
 738     }
 739     if (uri != NULL) {
 740         g_free(uri->path);
 741         if (cur != *str) {
 742             if (uri->cleanup & 2) {
 743                 uri->path = g_strndup(*str, cur - *str);
 744             } else {
 745                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
 746             }
 747         } else {
 748             uri->path = NULL;
 749         }
 750     }
 751     *str = cur;
 752     return 0;
 753 }
 754
 755 /**
 756  * rfc3986_parse_hier_part:
 757  * @uri:  pointer to an URI structure
 758  * @str:  the string to analyze
 759  *
 760  * Parse an hierarchical part and fills in the appropriate fields
 761  * of the @uri structure
 762  *
 763  * hier-part     = "//" authority path-abempty
 764  *                / path-absolute
 765  *                / path-rootless
 766  *                / path-empty
 767  *
 768  * Returns 0 or the error code
 769  */
 770 static int rfc3986_parse_hier_part(URI *uri, const char **str)
 771 {
 772     const char *cur;
 773     int ret;
 774
 775     cur = *str;
 776
 777     if ((*cur == '/') && (*(cur + 1) == '/')) {
 778         cur += 2;
 779         ret = rfc3986_parse_authority(uri, &cur);
 780         if (ret != 0) {
 781             return ret;
 782         }
 783         ret = rfc3986_parse_path_ab_empty(uri, &cur);
 784         if (ret != 0) {
 785             return ret;
 786         }
 787         *str = cur;
 788         return 0;
 789     } else if (*cur == '/') {
 790         ret = rfc3986_parse_path_absolute(uri, &cur);
 791         if (ret != 0) {
 792             return ret;
 793         }
 794     } else if (ISA_PCHAR(cur)) {
 795         ret = rfc3986_parse_path_rootless(uri, &cur);
 796         if (ret != 0) {
 797             return ret;
 798         }
 799     } else {
 800         /* path-empty is effectively empty */
 801         if (uri != NULL) {
 802             g_free(uri->path);
 803             uri->path = NULL;
 804         }
 805     }
 806     *str = cur;
 807     return 0;
 808 }
 809
 810 /**
 811  * rfc3986_parse_relative_ref:
 812  * @uri:  pointer to an URI structure
 813  * @str:  the string to analyze
 814  *
 815  * Parse an URI string and fills in the appropriate fields
 816  * of the @uri structure
 817  *
 818  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 819  * relative-part = "//" authority path-abempty
 820  *               / path-absolute
 821  *               / path-noscheme
 822  *               / path-empty
 823  *
 824  * Returns 0 or the error code
 825  */
 826 static int rfc3986_parse_relative_ref(URI *uri, const char *str)
 827 {
 828     int ret;
 829
 830     if ((*str == '/') && (*(str + 1) == '/')) {
 831         str += 2;
 832         ret = rfc3986_parse_authority(uri, &str);
 833         if (ret != 0) {
 834             return ret;
 835         }
 836         ret = rfc3986_parse_path_ab_empty(uri, &str);
 837         if (ret != 0) {
 838             return ret;
 839         }
 840     } else if (*str == '/') {
 841         ret = rfc3986_parse_path_absolute(uri, &str);
 842         if (ret != 0) {
 843             return ret;
 844         }
 845     } else if (ISA_PCHAR(str)) {
 846         ret = rfc3986_parse_path_no_scheme(uri, &str);
 847         if (ret != 0) {
 848             return ret;
 849         }
 850     } else {
 851         /* path-empty is effectively empty */
 852         if (uri != NULL) {
 853             g_free(uri->path);
 854             uri->path = NULL;
 855         }
 856     }
 857
 858     if (*str == '?') {
 859         str++;
 860         ret = rfc3986_parse_query(uri, &str);
 861         if (ret != 0) {
 862             return ret;
 863         }
 864     }
 865     if (*str == '#') {
 866         str++;
 867         ret = rfc3986_parse_fragment(uri, &str);
 868         if (ret != 0) {
 869             return ret;
 870         }
 871     }
 872     if (*str != 0) {
 873         uri_clean(uri);
 874         return 1;
 875     }
 876     return 0;
 877 }
 878
 879 /**
 880  * rfc3986_parse:
 881  * @uri:  pointer to an URI structure
 882  * @str:  the string to analyze
 883  *
 884  * Parse an URI string and fills in the appropriate fields
 885  * of the @uri structure
 886  *
 887  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 888  *
 889  * Returns 0 or the error code
 890  */
 891 static int rfc3986_parse(URI *uri, const char *str)
 892 {
 893     int ret;
 894
 895     ret = rfc3986_parse_scheme(uri, &str);
 896     if (ret != 0) {
 897         return ret;
 898     }
 899     if (*str != ':') {
 900         return 1;
 901     }
 902     str++;
 903     ret = rfc3986_parse_hier_part(uri, &str);
 904     if (ret != 0) {
 905         return ret;
 906     }
 907     if (*str == '?') {
 908         str++;
 909         ret = rfc3986_parse_query(uri, &str);
 910         if (ret != 0) {
 911             return ret;
 912         }
 913     }
 914     if (*str == '#') {
 915         str++;
 916         ret = rfc3986_parse_fragment(uri, &str);
 917         if (ret != 0) {
 918             return ret;
 919         }
 920     }
 921     if (*str != 0) {
 922         uri_clean(uri);
 923         return 1;
 924     }
 925     return 0;
 926 }
 927
 928 /**
 929  * rfc3986_parse_uri_reference:
 930  * @uri:  pointer to an URI structure
 931  * @str:  the string to analyze
 932  *
 933  * Parse an URI reference string and fills in the appropriate fields
 934  * of the @uri structure
 935  *
 936  * URI-reference = URI / relative-ref
 937  *
 938  * Returns 0 or the error code
 939  */
 940 static int rfc3986_parse_uri_reference(URI *uri, const char *str)
 941 {
 942     int ret;
 943
 944     if (str == NULL) {
 945         return -1;
 946     }
 947     uri_clean(uri);
 948
 949     /*
 950      * Try first to parse absolute refs, then fallback to relative if
 951      * it fails.
 952      */
 953     ret = rfc3986_parse(uri, str);
 954     if (ret != 0) {
 955         uri_clean(uri);
 956         ret = rfc3986_parse_relative_ref(uri, str);
 957         if (ret != 0) {
 958             uri_clean(uri);
 959             return ret;
 960         }
 961     }
 962     return 0;
 963 }
 964
 965 /**
 966  * uri_parse:
 967  * @str:  the URI string to analyze
 968  *
 969  * Parse an URI based on RFC 3986
 970  *
 971  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 972  *
 973  * Returns a newly built URI or NULL in case of error
 974  */
 975 URI *uri_parse(const char *str)
 976 {
 977     URI *uri;
 978     int ret;
 979
 980     if (str == NULL) {
 981         return NULL;
 982     }
 983     uri = uri_new();
 984     ret = rfc3986_parse_uri_reference(uri, str);
 985     if (ret) {
 986         uri_free(uri);
 987         return NULL;
 988     }
 989     return uri;
 990 }
 991
 992 /**
 993  * uri_parse_into:
 994  * @uri:  pointer to an URI structure
 995  * @str:  the string to analyze
 996  *
 997  * Parse an URI reference string based on RFC 3986 and fills in the
 998  * appropriate fields of the @uri structure
 999  *
1000  * URI-reference = URI / relative-ref
1001  *
1002  * Returns 0 or the error code
1003  */
1004 int uri_parse_into(URI *uri, const char *str)
1005 {
1006     return rfc3986_parse_uri_reference(uri, str);
1007 }
1008
1009 /**
1010  * uri_parse_raw:
1011  * @str:  the URI string to analyze
1012  * @raw:  if 1 unescaping of URI pieces are disabled
1013  *
1014  * Parse an URI but allows to keep intact the original fragments.
1015  *
1016  * URI-reference = URI / relative-ref
1017  *
1018  * Returns a newly built URI or NULL in case of error
1019  */
1020 URI *uri_parse_raw(const char *str, int raw)
1021 {
1022     URI *uri;
1023     int ret;
1024
1025     if (str == NULL) {
1026         return NULL;
1027     }
1028     uri = uri_new();
1029     if (raw) {
1030         uri->cleanup |= 2;
1031     }
1032     ret = uri_parse_into(uri, str);
1033     if (ret) {
1034         uri_free(uri);
1035         return NULL;
1036     }
1037     return uri;
1038 }
1039
1040 /************************************************************************
1041  *                                                                      *
1042  *                    Generic URI structure functions                   *
1043  *                                                                      *
1044  ************************************************************************/
1045
1046 /**
1047  * uri_new:
1048  *
1049  * Simply creates an empty URI
1050  *
1051  * Returns the new structure or NULL in case of error
1052  */
1053 URI *uri_new(void)
1054 {
1055     return g_new0(URI, 1);
1056 }
1057
1058 /**
1059  * realloc2n:
1060  *
1061  * Function to handle properly a reallocation when saving an URI
1062  * Also imposes some limit on the length of an URI string output
1063  */
1064 static char *realloc2n(char *ret, int *max)
1065 {
1066     char *temp;
1067     int tmp;
1068
1069     tmp = *max * 2;
1070     temp = g_realloc(ret, (tmp + 1));
1071     *max = tmp;
1072     return temp;
1073 }
1074
1075 /**
1076  * uri_to_string:
1077  * @uri:  pointer to an URI
1078  *
1079  * Save the URI as an escaped string
1080  *
1081  * Returns a new string (to be deallocated by caller)
1082  */
1083 char *uri_to_string(URI *uri)
1084 {
1085     char *ret = NULL;
1086     char *temp;
1087     const char *p;
1088     int len;
1089     int max;
1090
1091     if (uri == NULL) {
1092         return NULL;
1093     }
1094
1095     max = 80;
1096     ret = g_malloc(max + 1);
1097     len = 0;
1098
1099     if (uri->scheme != NULL) {
1100         p = uri->scheme;
1101         while (*p != 0) {
1102             if (len >= max) {
1103                 temp = realloc2n(ret, &max);
1104                 ret = temp;
1105             }
1106             ret[len++] = *p++;
1107         }
1108         if (len >= max) {
1109             temp = realloc2n(ret, &max);
1110             ret = temp;
1111         }
1112         ret[len++] = ':';
1113     }
1114     if (uri->opaque != NULL) {
1115         p = uri->opaque;
1116         while (*p != 0) {
1117             if (len + 3 >= max) {
1118                 temp = realloc2n(ret, &max);
1119                 ret = temp;
1120             }
1121             if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) {
1122                 ret[len++] = *p++;
1123             } else {
1124                 int val = *(unsigned char *)p++;
1125                 int hi = val / 0x10, lo = val % 0x10;
1126                 ret[len++] = '%';
1127                 ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1128                 ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1129             }
1130         }
1131     } else {
1132         if (uri->server != NULL) {
1133             if (len + 3 >= max) {
1134                 temp = realloc2n(ret, &max);
1135                 ret = temp;
1136             }
1137             ret[len++] = '/';
1138             ret[len++] = '/';
1139             if (uri->user != NULL) {
1140                 p = uri->user;
1141                 while (*p != 0) {
1142                     if (len + 3 >= max) {
1143                         temp = realloc2n(ret, &max);
1144                         ret = temp;
1145                     }
1146                     if ((IS_UNRESERVED(*(p))) || ((*(p) == ';')) ||
1147                         ((*(p) == ':')) || ((*(p) == '&')) || ((*(p) == '=')) ||
1148                         ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) {
1149                         ret[len++] = *p++;
1150                     } else {
1151                         int val = *(unsigned char *)p++;
1152                         int hi = val / 0x10, lo = val % 0x10;
1153                         ret[len++] = '%';
1154                         ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1155                         ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1156                     }
1157                 }
1158                 if (len + 3 >= max) {
1159                     temp = realloc2n(ret, &max);
1160                     ret = temp;
1161                 }
1162                 ret[len++] = '@';
1163             }
1164             p = uri->server;
1165             while (*p != 0) {
1166                 if (len >= max) {
1167                     temp = realloc2n(ret, &max);
1168                     ret = temp;
1169                 }
1170                 ret[len++] = *p++;
1171             }
1172             if (uri->port > 0) {
1173                 if (len + 10 >= max) {
1174                     temp = realloc2n(ret, &max);
1175                     ret = temp;
1176                 }
1177                 len += snprintf(&ret[len], max - len, ":%d", uri->port);
1178             }
1179         } else if (uri->authority != NULL) {
1180             if (len + 3 >= max) {
1181                 temp = realloc2n(ret, &max);
1182                 ret = temp;
1183             }
1184             ret[len++] = '/';
1185             ret[len++] = '/';
1186             p = uri->authority;
1187             while (*p != 0) {
1188                 if (len + 3 >= max) {
1189                     temp = realloc2n(ret, &max);
1190                     ret = temp;
1191                 }
1192                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '$')) ||
1193                     ((*(p) == ',')) || ((*(p) == ';')) || ((*(p) == ':')) ||
1194                     ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
1195                     ((*(p) == '+'))) {
1196                     ret[len++] = *p++;
1197                 } else {
1198                     int val = *(unsigned char *)p++;
1199                     int hi = val / 0x10, lo = val % 0x10;
1200                     ret[len++] = '%';
1201                     ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1202                     ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1203                 }
1204             }
1205         } else if (uri->scheme != NULL) {
1206             if (len + 3 >= max) {
1207                 temp = realloc2n(ret, &max);
1208                 ret = temp;
1209             }
1210             ret[len++] = '/';
1211             ret[len++] = '/';
1212         }
1213         if (uri->path != NULL) {
1214             p = uri->path;
1215             /*
1216              * the colon in file:///d: should not be escaped or
1217              * Windows accesses fail later.
1218              */
1219             if ((uri->scheme != NULL) && (p[0] == '/') &&
1220                 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1221                  ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1222                 (p[2] == ':') && (!strcmp(uri->scheme, "file"))) {
1223                 if (len + 3 >= max) {
1224                     temp = realloc2n(ret, &max);
1225                     ret = temp;
1226                 }
1227                 ret[len++] = *p++;
1228                 ret[len++] = *p++;
1229                 ret[len++] = *p++;
1230             }
1231             while (*p != 0) {
1232                 if (len + 3 >= max) {
1233                     temp = realloc2n(ret, &max);
1234                     ret = temp;
1235                 }
1236                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1237                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1238                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1239                     ((*(p) == ','))) {
1240                     ret[len++] = *p++;
1241                 } else {
1242                     int val = *(unsigned char *)p++;
1243                     int hi = val / 0x10, lo = val % 0x10;
1244                     ret[len++] = '%';
1245                     ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1246                     ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1247                 }
1248             }
1249         }
1250         if (uri->query != NULL) {
1251             if (len + 1 >= max) {
1252                 temp = realloc2n(ret, &max);
1253                 ret = temp;
1254             }
1255             ret[len++] = '?';
1256             p = uri->query;
1257             while (*p != 0) {
1258                 if (len + 1 >= max) {
1259                     temp = realloc2n(ret, &max);
1260                     ret = temp;
1261                 }
1262                 ret[len++] = *p++;
1263             }
1264         }
1265     }
1266     if (uri->fragment != NULL) {
1267         if (len + 3 >= max) {
1268             temp = realloc2n(ret, &max);
1269             ret = temp;
1270         }
1271         ret[len++] = '#';
1272         p = uri->fragment;
1273         while (*p != 0) {
1274             if (len + 3 >= max) {
1275                 temp = realloc2n(ret, &max);
1276                 ret = temp;
1277             }
1278             if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) {
1279                 ret[len++] = *p++;
1280             } else {
1281                 int val = *(unsigned char *)p++;
1282                 int hi = val / 0x10, lo = val % 0x10;
1283                 ret[len++] = '%';
1284                 ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1285                 ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1286             }
1287         }
1288     }
1289     if (len >= max) {
1290         temp = realloc2n(ret, &max);
1291         ret = temp;
1292     }
1293     ret[len] = 0;
1294     return ret;
1295 }
1296
1297 /**
1298  * uri_clean:
1299  * @uri:  pointer to an URI
1300  *
1301  * Make sure the URI struct is free of content
1302  */
1303 static void uri_clean(URI *uri)
1304 {
1305     if (uri == NULL) {
1306         return;
1307     }
1308
1309     g_free(uri->scheme);
1310     uri->scheme = NULL;
1311     g_free(uri->server);
1312     uri->server = NULL;
1313     g_free(uri->user);
1314     uri->user = NULL;
1315     g_free(uri->path);
1316     uri->path = NULL;
1317     g_free(uri->fragment);
1318     uri->fragment = NULL;
1319     g_free(uri->opaque);
1320     uri->opaque = NULL;
1321     g_free(uri->authority);
1322     uri->authority = NULL;
1323     g_free(uri->query);
1324     uri->query = NULL;
1325 }
1326
1327 /**
1328  * uri_free:
1329  * @uri:  pointer to an URI, NULL is ignored
1330  *
1331  * Free up the URI struct
1332  */
1333 void uri_free(URI *uri)
1334 {
1335     uri_clean(uri);
1336     g_free(uri);
1337 }
1338
1339 /************************************************************************
1340  *                                                                      *
1341  *                           Public functions                           *
1342  *                                                                      *
1343  ************************************************************************/
1344
1345 /*
1346  * Utility functions to help parse and assemble query strings.
1347  */
1348
1349 struct QueryParams *query_params_new(int init_alloc)
1350 {
1351     struct QueryParams *ps;
1352
1353     if (init_alloc <= 0) {
1354         init_alloc = 1;
1355     }
1356
1357     ps = g_new(QueryParams, 1);
1358     ps->n = 0;
1359     ps->alloc = init_alloc;
1360     ps->p = g_new(QueryParam, ps->alloc);
1361
1362     return ps;
1363 }
1364
1365 /* Ensure there is space to store at least one more parameter
1366  * at the end of the set.
1367  */
1368 static int query_params_append(struct QueryParams *ps, const char *name,
1369                                const char *value)
1370 {
1371     if (ps->n >= ps->alloc) {
1372         ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
1373         ps->alloc *= 2;
1374     }
1375
1376     ps->p[ps->n].name = g_strdup(name);
1377     ps->p[ps->n].value = g_strdup(value);
1378     ps->p[ps->n].ignore = 0;
1379     ps->n++;
1380
1381     return 0;
1382 }
1383
1384 void query_params_free(struct QueryParams *ps)
1385 {
1386     int i;
1387
1388     for (i = 0; i < ps->n; ++i) {
1389         g_free(ps->p[i].name);
1390         g_free(ps->p[i].value);
1391     }
1392     g_free(ps->p);
1393     g_free(ps);
1394 }
1395
1396 struct QueryParams *query_params_parse(const char *query)
1397 {
1398     struct QueryParams *ps;
1399     const char *end, *eq;
1400
1401     ps = query_params_new(0);
1402     if (!query || query[0] == '\0') {
1403         return ps;
1404     }
1405
1406     while (*query) {
1407         char *name = NULL, *value = NULL;
1408
1409         /* Find the next separator, or end of the string. */
1410         end = strchr(query, '&');
1411         if (!end) {
1412             end = qemu_strchrnul(query, ';');
1413         }
1414
1415         /* Find the first '=' character between here and end. */
1416         eq = strchr(query, '=');
1417         if (eq && eq >= end) {
1418             eq = NULL;
1419         }
1420
1421         /* Empty section (eg. "&&"). */
1422         if (end == query) {
1423             goto next;
1424         }
1425
1426         /* If there is no '=' character, then we have just "name"
1427          * and consistent with CGI.pm we assume value is "".
1428          */
1429         else if (!eq) {
1430             name = g_uri_unescape_segment(query, end, NULL);
1431             value = NULL;
1432         }
1433         /* Or if we have "name=" here (works around annoying
1434          * problem when calling uri_string_unescape with len = 0).
1435          */
1436         else if (eq + 1 == end) {
1437             name = g_uri_unescape_segment(query, eq, NULL);
1438             value = g_new0(char, 1);
1439         }
1440         /* If the '=' character is at the beginning then we have
1441          * "=value" and consistent with CGI.pm we _ignore_ this.
1442          */
1443         else if (query == eq) {
1444             goto next;
1445         }
1446
1447         /* Otherwise it's "name=value". */
1448         else {
1449             name = g_uri_unescape_segment(query, eq, NULL);
1450             value = g_uri_unescape_segment(eq + 1, end, NULL);
1451         }
1452
1453         /* Append to the parameter set. */
1454         query_params_append(ps, name, value);
1455         g_free(name);
1456         g_free(value);
1457
1458     next:
1459         query = end;
1460         if (*query) {
1461             query++; /* skip '&' separator */
1462         }
1463     }
1464
1465     return ps;
1466 }