dlls/urlmon/uri.c

   1 /*
   2  * Copyright 2010 Jacek Caban for CodeWeavers
   3  * Copyright 2010 Thomas Mullaly
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18  */
  19
  20 #include "urlmon_main.h"
  21 #include "wine/debug.h"
  22
  23 #define NO_SHLWAPI_REG
  24 #include "shlwapi.h"
  25
  26 #define UINT_MAX 0xffffffff
  27 #define USHORT_MAX 0xffff
  28
  29 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
  30
  31 typedef struct {
  32     const IUriVtbl  *lpIUriVtbl;
  33     LONG ref;
  34
  35     BSTR            raw_uri;
  36
  37     /* Information about the canonicalized URI's buffer. */
  38     WCHAR           *canon_uri;
  39     DWORD           canon_size;
  40     DWORD           canon_len;
  41
  42     INT             scheme_start;
  43     DWORD           scheme_len;
  44     URL_SCHEME      scheme_type;
  45
  46     INT             userinfo_start;
  47     DWORD           userinfo_len;
  48     INT             userinfo_split;
  49
  50     INT             host_start;
  51     DWORD           host_len;
  52     Uri_HOST_TYPE   host_type;
  53
  54     USHORT          port;
  55     BOOL            has_port;
  56
  57     INT             authority_start;
  58     DWORD           authority_len;
  59
  60     INT             domain_offset;
  61
  62     INT             path_start;
  63     DWORD           path_len;
  64     INT             extension_offset;
  65
  66     INT             query_start;
  67     DWORD           query_len;
  68
  69     INT             fragment_start;
  70     DWORD           fragment_len;
  71 } Uri;
  72
  73 typedef struct {
  74     const IUriBuilderVtbl  *lpIUriBuilderVtbl;
  75     LONG ref;
  76 } UriBuilder;
  77
  78 typedef struct {
  79     const WCHAR *str;
  80     DWORD       len;
  81 } h16;
  82
  83 typedef struct {
  84     /* IPv6 addresses can hold up to 8 h16 components. */
  85     h16         components[8];
  86     DWORD       h16_count;
  87
  88     /* An IPv6 can have 1 elision ("::"). */
  89     const WCHAR *elision;
  90
  91     /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
  92     const WCHAR *ipv4;
  93     DWORD       ipv4_len;
  94
  95     INT         components_size;
  96     INT         elision_size;
  97 } ipv6_address;
  98
  99 typedef struct {
 100     BSTR            uri;
 101
 102     BOOL            is_relative;
 103     BOOL            is_opaque;
 104     BOOL            has_implicit_scheme;
 105     BOOL            has_implicit_ip;
 106     UINT            implicit_ipv4;
 107
 108     const WCHAR     *scheme;
 109     DWORD           scheme_len;
 110     URL_SCHEME      scheme_type;
 111
 112     const WCHAR     *userinfo;
 113     DWORD           userinfo_len;
 114     INT             userinfo_split;
 115
 116     const WCHAR     *host;
 117     DWORD           host_len;
 118     Uri_HOST_TYPE   host_type;
 119
 120     BOOL            has_ipv6;
 121     ipv6_address    ipv6_address;
 122
 123     const WCHAR     *port;
 124     DWORD           port_len;
 125     USHORT          port_value;
 126
 127     const WCHAR     *path;
 128     DWORD           path_len;
 129
 130     const WCHAR     *query;
 131     DWORD           query_len;
 132
 133     const WCHAR     *fragment;
 134     DWORD           fragment_len;
 135 } parse_data;
 136
 137 static const CHAR hexDigits[] = "0123456789ABCDEF";
 138
 139 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
 140 static const struct {
 141     URL_SCHEME  scheme;
 142     WCHAR       scheme_name[16];
 143 } recognized_schemes[] = {
 144     {URL_SCHEME_FTP,            {'f','t','p',0}},
 145     {URL_SCHEME_HTTP,           {'h','t','t','p',0}},
 146     {URL_SCHEME_GOPHER,         {'g','o','p','h','e','r',0}},
 147     {URL_SCHEME_MAILTO,         {'m','a','i','l','t','o',0}},
 148     {URL_SCHEME_NEWS,           {'n','e','w','s',0}},
 149     {URL_SCHEME_NNTP,           {'n','n','t','p',0}},
 150     {URL_SCHEME_TELNET,         {'t','e','l','n','e','t',0}},
 151     {URL_SCHEME_WAIS,           {'w','a','i','s',0}},
 152     {URL_SCHEME_FILE,           {'f','i','l','e',0}},
 153     {URL_SCHEME_MK,             {'m','k',0}},
 154     {URL_SCHEME_HTTPS,          {'h','t','t','p','s',0}},
 155     {URL_SCHEME_SHELL,          {'s','h','e','l','l',0}},
 156     {URL_SCHEME_SNEWS,          {'s','n','e','w','s',0}},
 157     {URL_SCHEME_LOCAL,          {'l','o','c','a','l',0}},
 158     {URL_SCHEME_JAVASCRIPT,     {'j','a','v','a','s','c','r','i','p','t',0}},
 159     {URL_SCHEME_VBSCRIPT,       {'v','b','s','c','r','i','p','t',0}},
 160     {URL_SCHEME_ABOUT,          {'a','b','o','u','t',0}},
 161     {URL_SCHEME_RES,            {'r','e','s',0}},
 162     {URL_SCHEME_MSSHELLROOTED,  {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
 163     {URL_SCHEME_MSSHELLIDLIST,  {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
 164     {URL_SCHEME_MSHELP,         {'h','c','p',0}},
 165     {URL_SCHEME_WILDCARD,       {'*',0}}
 166 };
 167
 168 /* List of default ports Windows recognizes. */
 169 static const struct {
 170     URL_SCHEME  scheme;
 171     USHORT      port;
 172 } default_ports[] = {
 173     {URL_SCHEME_FTP,    21},
 174     {URL_SCHEME_HTTP,   80},
 175     {URL_SCHEME_GOPHER, 70},
 176     {URL_SCHEME_NNTP,   119},
 177     {URL_SCHEME_TELNET, 23},
 178     {URL_SCHEME_WAIS,   210},
 179     {URL_SCHEME_HTTPS,  443},
 180 };
 181
 182 /* List of 3 character top level domain names Windows seems to recognize.
 183  * There might be more, but, these are the only ones I've found so far.
 184  */
 185 static const struct {
 186     WCHAR tld_name[4];
 187 } recognized_tlds[] = {
 188     {{'c','o','m',0}},
 189     {{'e','d','u',0}},
 190     {{'g','o','v',0}},
 191     {{'i','n','t',0}},
 192     {{'m','i','l',0}},
 193     {{'n','e','t',0}},
 194     {{'o','r','g',0}}
 195 };
 196
 197 static inline BOOL is_alpha(WCHAR val) {
 198         return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
 199 }
 200
 201 static inline BOOL is_num(WCHAR val) {
 202         return (val >= '0' && val <= '9');
 203 }
 204
 205 /* A URI is implicitly a file path if it begins with
 206  * a drive letter (eg X:) or starts with "\\" (UNC path).
 207  */
 208 static inline BOOL is_implicit_file_path(const WCHAR *str) {
 209     if(is_alpha(str[0]) && str[1] == ':')
 210         return TRUE;
 211     else if(str[0] == '\\' && str[1] == '\\')
 212         return TRUE;
 213
 214     return FALSE;
 215 }
 216
 217 /* Checks if the URI is a hierarchical URI. A hierarchical
 218  * URI is one that has "//" after the scheme.
 219  */
 220 static BOOL check_hierarchical(const WCHAR **ptr) {
 221     const WCHAR *start = *ptr;
 222
 223     if(**ptr != '/')
 224         return FALSE;
 225
 226     ++(*ptr);
 227     if(**ptr != '/') {
 228         *ptr = start;
 229         return FALSE;
 230     }
 231
 232     ++(*ptr);
 233     return TRUE;
 234 }
 235
 236 /* unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" */
 237 static inline BOOL is_unreserved(WCHAR val) {
 238     return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
 239             val == '_' || val == '~');
 240 }
 241
 242 /* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 243  *               / "*" / "+" / "," / ";" / "="
 244  */
 245 static inline BOOL is_subdelim(WCHAR val) {
 246     return (val == '!' || val == '$' || val == '&' ||
 247             val == '\'' || val == '(' || val == ')' ||
 248             val == '*' || val == '+' || val == ',' ||
 249             val == ';' || val == '=');
 250 }
 251
 252 /* gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
 253 static inline BOOL is_gendelim(WCHAR val) {
 254     return (val == ':' || val == '/' || val == '?' ||
 255             val == '#' || val == '[' || val == ']' ||
 256             val == '@');
 257 }
 258
 259 /* Characters that delimit the end of the authority
 260  * section of a URI. Sometimes a '\\' is considered
 261  * an authority delimeter.
 262  */
 263 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
 264     return (val == '#' || val == '/' || val == '?' ||
 265             val == '\0' || (acceptSlash && val == '\\'));
 266 }
 267
 268 /* reserved = gen-delims / sub-delims */
 269 static inline BOOL is_reserved(WCHAR val) {
 270     return (is_subdelim(val) || is_gendelim(val));
 271 }
 272
 273 static inline BOOL is_hexdigit(WCHAR val) {
 274     return ((val >= 'a' && val <= 'f') ||
 275             (val >= 'A' && val <= 'F') ||
 276             (val >= '0' && val <= '9'));
 277 }
 278
 279 static inline BOOL is_path_delim(WCHAR val) {
 280     return (!val || val == '#' || val == '?');
 281 }
 282
 283 /* Computes the size of the given IPv6 address.
 284  * Each h16 component is 16bits, if there is an IPv4 address, it's
 285  * 32bits. If there's an elision it can be 16bits to 128bits, depending
 286  * on the number of other components.
 287  *
 288  * Modeled after google-url's CheckIPv6ComponentsSize function
 289  */
 290 static void compute_ipv6_comps_size(ipv6_address *address) {
 291     address->components_size = address->h16_count * 2;
 292
 293     if(address->ipv4)
 294         /* IPv4 address is 4 bytes. */
 295         address->components_size += 4;
 296
 297     if(address->elision) {
 298         /* An elision can be anywhere from 2 bytes up to 16 bytes.
 299          * It size depends on the size of the h16 and IPv4 components.
 300          */
 301         address->elision_size = 16 - address->components_size;
 302         if(address->elision_size < 2)
 303             address->elision_size = 2;
 304     } else
 305         address->elision_size = 0;
 306 }
 307
 308 /* Taken from dlls/jscript/lex.c */
 309 static int hex_to_int(WCHAR val) {
 310     if(val >= '0' && val <= '9')
 311         return val - '0';
 312     else if(val >= 'a' && val <= 'f')
 313         return val - 'a' + 10;
 314     else if(val >= 'A' && val <= 'F')
 315         return val - 'A' + 10;
 316
 317     return -1;
 318 }
 319
 320 /* Helper function for converting a percent encoded string
 321  * representation of a WCHAR value into its actual WCHAR value. If
 322  * the two characters following the '%' aren't valid hex values then
 323  * this function returns the NULL character.
 324  *
 325  * Eg.
 326  *  "%2E" will result in '.' being returned by this function.
 327  */
 328 static WCHAR decode_pct_val(const WCHAR *ptr) {
 329     WCHAR ret = '\0';
 330
 331     if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
 332         INT a = hex_to_int(*(ptr + 1));
 333         INT b = hex_to_int(*(ptr + 2));
 334
 335         ret = a << 4;
 336         ret += b;
 337     }
 338
 339     return ret;
 340 }
 341
 342 /* Helper function for percent encoding a given character
 343  * and storing the encoded value into a given buffer (dest).
 344  *
 345  * It's up to the calling function to ensure that there is
 346  * at least enough space in 'dest' for the percent encoded
 347  * value to be stored (so dest + 3 spaces available).
 348  */
 349 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
 350     dest[0] = '%';
 351     dest[1] = hexDigits[(val >> 4) & 0xf];
 352     dest[2] = hexDigits[val & 0xf];
 353 }
 354
 355 /* Scans the range of characters [str, end] and returns the last occurence
 356  * of 'ch' or returns NULL.
 357  */
 358 static const WCHAR *str_last_of(const WCHAR *str, const WCHAR *end, WCHAR ch) {
 359     const WCHAR *ptr = end;
 360
 361     while(ptr >= str) {
 362         if(*ptr == ch)
 363             return ptr;
 364         --ptr;
 365     }
 366
 367     return NULL;
 368 }
 369
 370 /* Attempts to parse the domain name from the host.
 371  *
 372  * This function also includes the Top-level Domain (TLD) name
 373  * of the host when it tries to find the domain name. If it finds
 374  * a valid domain name it will assign 'domain_start' the offset
 375  * into 'host' where the domain name starts.
 376  *
 377  * It's implied that if a domain name its range is implied to be
 378  * [host+domain_start, host+host_len).
 379  */
 380 static void find_domain_name(const WCHAR *host, DWORD host_len,
 381                              INT *domain_start) {
 382     const WCHAR *last_tld, *sec_last_tld, *end;
 383
 384     end = host+host_len-1;
 385
 386     *domain_start = -1;
 387
 388     /* There has to be at least enough room for a '.' followed by a
 389      * 3 character TLD for a domain to even exist in the host name.
 390      */
 391     if(host_len < 4)
 392         return;
 393
 394     last_tld = str_last_of(host, end, '.');
 395     if(!last_tld)
 396         /* http://hostname -> has no domain name. */
 397         return;
 398
 399     sec_last_tld = str_last_of(host, last_tld-1, '.');
 400     if(!sec_last_tld) {
 401         /* If the '.' is at the beginning of the host there
 402          * has to be at least 3 characters in the TLD for it
 403          * to be valid.
 404          *  Ex: .com -> .com as the domain name.
 405          *      .co  -> has no domain name.
 406          */
 407         if(last_tld-host == 0) {
 408             if(end-(last_tld-1) < 3)
 409                 return;
 410         } else if(last_tld-host == 3) {
 411             DWORD i;
 412
 413             /* If there's three characters in front of last_tld and
 414              * they are on the list of recognized TLDs, then this
 415              * host doesn't have a domain (since the host only contains
 416              * a TLD name.
 417              *  Ex: edu.uk -> has no domain name.
 418              *      foo.uk -> foo.uk as the domain name.
 419              */
 420             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 421                 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
 422                     return;
 423             }
 424         } else if(last_tld-host < 3)
 425             /* Anything less then 3 characters is considered part
 426              * of the TLD name.
 427              *  Ex: ak.uk -> Has no domain name.
 428              */
 429             return;
 430
 431         /* Otherwise the domain name is the whole host name. */
 432         *domain_start = 0;
 433     } else if(end+1-last_tld > 3) {
 434         /* If the last_tld has more then 3 characters then it's automatically
 435          * considered the TLD of the domain name.
 436          *  Ex: www.winehq.org.uk.test -> uk.test as the domain name.
 437          */
 438         *domain_start = (sec_last_tld+1)-host;
 439     } else if(last_tld - (sec_last_tld+1) < 4) {
 440         DWORD i;
 441         /* If the sec_last_tld is 3 characters long it HAS to be on the list of
 442          * recognized to still be considered part of the TLD name, otherwise
 443          * its considered the domain name.
 444          *  Ex: www.google.com.uk -> google.com.uk as the domain name.
 445          *      www.google.foo.uk -> foo.uk as the domain name.
 446          */
 447         if(last_tld - (sec_last_tld+1) == 3) {
 448             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 449                 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
 450                     const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 451
 452                     if(!domain)
 453                         *domain_start = 0;
 454                     else
 455                         *domain_start = (domain+1) - host;
 456                     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 457                                                         (host+host_len)-(host+*domain_start)));
 458                     return;
 459                 }
 460             }
 461
 462             *domain_start = (sec_last_tld+1)-host;
 463         } else {
 464             /* Since the sec_last_tld is less then 3 characters it's considered
 465              * part of the TLD.
 466              *  Ex: www.google.fo.uk -> google.fo.uk as the domain name.
 467              */
 468             const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 469
 470             if(!domain)
 471                 *domain_start = 0;
 472             else
 473                 *domain_start = (domain+1) - host;
 474         }
 475     } else {
 476         /* The second to last TLD has more then 3 characters making it
 477          * the domain name.
 478          *  Ex: www.google.test.us -> test.us as the domain name.
 479          */
 480         *domain_start = (sec_last_tld+1)-host;
 481     }
 482
 483     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 484                                         (host+host_len)-(host+*domain_start)));
 485 }
 486
 487 /* Removes the dot segments from a heirarchical URIs path component. This
 488  * function performs the removal in place.
 489  *
 490  * This is a modified version of Qt's QUrl function "removeDotsFromPath".
 491  *
 492  * This function returns the new length of the path string.
 493  */
 494 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) {
 495     WCHAR *out = path;
 496     const WCHAR *in = out;
 497     const WCHAR *end = out + path_len;
 498     DWORD len;
 499
 500     while(in < end) {
 501         /* A.  if the input buffer begins with a prefix of "/./" or "/.",
 502          *     where "." is a complete path segment, then replace that
 503          *     prefix with "/" in the input buffer; otherwise,
 504          */
 505         if(in <= end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '/') {
 506             in += 2;
 507             continue;
 508         } else if(in == end - 2 && in[0] == '/' && in[1] == '.') {
 509             *out++ = '/';
 510             in += 2;
 511             break;
 512         }
 513
 514         /* B.  if the input buffer begins with a prefix of "/../" or "/..",
 515          *     where ".." is a complete path segment, then replace that
 516          *     prefix with "/" in the input buffer and remove the last
 517          *     segment and its preceding "/" (if any) from the output
 518          *     buffer; otherwise,
 519          */
 520         if(in <= end - 4 && in[0] == '/' && in[1] == '.' && in[2] == '.' && in[3] == '/') {
 521             while(out > path && *(--out) != '/');
 522
 523             in += 3;
 524             continue;
 525         } else if(in == end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '.') {
 526             while(out > path && *(--out) != '/');
 527
 528             if(*out == '/')
 529                 ++out;
 530
 531             in += 3;
 532             break;
 533         }
 534
 535         /* C.  move the first path segment in the input buffer to the end of
 536          *     the output buffer, including the initial "/" character (if
 537          *     any) and any subsequent characters up to, but not including,
 538          *     the next "/" character or the end of the input buffer.
 539          */
 540         *out++ = *in++;
 541         while(in < end && *in != '/')
 542             *out++ = *in++;
 543     }
 544
 545     len = out - path;
 546     TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len,
 547         debugstr_wn(path, len), len);
 548     return len;
 549 }
 550
 551 /* Attempts to find the file extension in a given path. */
 552 static INT find_file_extension(const WCHAR *path, DWORD path_len) {
 553     const WCHAR *end;
 554
 555     for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) {
 556         if(*end == '.')
 557             return end-path;
 558     }
 559
 560     return -1;
 561 }
 562
 563 /* Computes the location where the elision should occur in the IPv6
 564  * address using the numerical values of each component stored in
 565  * 'values'. If the address shouldn't contain an elision then 'index'
 566  * is assigned -1 as it's value. Otherwise 'index' will contain the
 567  * starting index (into values) where the elision should be, and 'count'
 568  * will contain the number of cells the elision covers.
 569  *
 570  * NOTES:
 571  *  Windows will expand an elision if the elision only represents 1 h16
 572  *  component of the URI.
 573  *
 574  *  Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
 575  *
 576  *  If the IPv6 address contains an IPv4 address, the IPv4 address is also
 577  *  considered for being included as part of an elision if all it's components
 578  *  are zeros.
 579  *
 580  *  Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
 581  */
 582 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
 583                                      INT *index, DWORD *count) {
 584     DWORD i, max_len, cur_len;
 585     INT max_index, cur_index;
 586
 587     max_len = cur_len = 0;
 588     max_index = cur_index = -1;
 589     for(i = 0; i < 8; ++i) {
 590         BOOL check_ipv4 = (address->ipv4 && i == 6);
 591         BOOL is_end = (check_ipv4 || i == 7);
 592
 593         if(check_ipv4) {
 594             /* Check if the IPv4 address contains only zeros. */
 595             if(values[i] == 0 && values[i+1] == 0) {
 596                 if(cur_index == -1)
 597                     cur_index = i;
 598
 599                 cur_len += 2;
 600                 ++i;
 601             }
 602         } else if(values[i] == 0) {
 603             if(cur_index == -1)
 604                 cur_index = i;
 605
 606             ++cur_len;
 607         }
 608
 609         if(is_end || values[i] != 0) {
 610             /* We only consider it for an elision if it's
 611              * more then 1 component long.
 612              */
 613             if(cur_len > 1 && cur_len > max_len) {
 614                 /* Found the new elision location. */
 615                 max_len = cur_len;
 616                 max_index = cur_index;
 617             }
 618
 619             /* Reset the current range for the next range of zeros. */
 620             cur_index = -1;
 621             cur_len = 0;
 622         }
 623     }
 624
 625     *index = max_index;
 626     *count = max_len;
 627 }
 628
 629 /* Removes all the leading and trailing white spaces or
 630  * control characters from the URI and removes all control
 631  * characters inside of the URI string.
 632  */
 633 static BSTR pre_process_uri(LPCWSTR uri) {
 634     BSTR ret;
 635     DWORD len;
 636     const WCHAR *start, *end;
 637     WCHAR *buf, *ptr;
 638
 639     len = lstrlenW(uri);
 640
 641     start = uri;
 642     /* Skip leading controls and whitespace. */
 643     while(iscntrlW(*start) || isspaceW(*start)) ++start;
 644
 645     end = uri+len-1;
 646     if(start == end)
 647         /* URI consisted only of control/whitespace. */
 648         ret = SysAllocStringLen(NULL, 0);
 649     else {
 650         while(iscntrlW(*end) || isspaceW(*end)) --end;
 651
 652         buf = heap_alloc(((end+1)-start)*sizeof(WCHAR));
 653         if(!buf)
 654             return NULL;
 655
 656         for(ptr = buf; start < end+1; ++start) {
 657             if(!iscntrlW(*start))
 658                 *ptr++ = *start;
 659         }
 660
 661         ret = SysAllocStringLen(buf, ptr-buf);
 662         heap_free(buf);
 663     }
 664
 665     return ret;
 666 }
 667
 668 /* Converts the specified IPv4 address into an uint value.
 669  *
 670  * This function assumes that the IPv4 address has already been validated.
 671  */
 672 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
 673     UINT ret = 0;
 674     DWORD comp_value = 0;
 675     const WCHAR *ptr;
 676
 677     for(ptr = ip; ptr < ip+len; ++ptr) {
 678         if(*ptr == '.') {
 679             ret <<= 8;
 680             ret += comp_value;
 681             comp_value = 0;
 682         } else
 683             comp_value = comp_value*10 + (*ptr-'0');
 684     }
 685
 686     ret <<= 8;
 687     ret += comp_value;
 688
 689     return ret;
 690 }
 691
 692 /* Converts an IPv4 address in numerical form into it's fully qualified
 693  * string form. This function returns the number of characters written
 694  * to 'dest'. If 'dest' is NULL this function will return the number of
 695  * characters that would have been written.
 696  *
 697  * It's up to the caller to ensure there's enough space in 'dest' for the
 698  * address.
 699  */
 700 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
 701     static const WCHAR formatW[] =
 702         {'%','u','.','%','u','.','%','u','.','%','u',0};
 703     DWORD ret = 0;
 704     UCHAR digits[4];
 705
 706     digits[0] = (address >> 24) & 0xff;
 707     digits[1] = (address >> 16) & 0xff;
 708     digits[2] = (address >> 8) & 0xff;
 709     digits[3] = address & 0xff;
 710
 711     if(!dest) {
 712         WCHAR tmp[16];
 713         ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
 714     } else
 715         ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
 716
 717     return ret;
 718 }
 719
 720 /* Converts an h16 component (from an IPv6 address) into it's
 721  * numerical value.
 722  *
 723  * This function assumes that the h16 component has already been validated.
 724  */
 725 static USHORT h16tous(h16 component) {
 726     DWORD i;
 727     USHORT ret = 0;
 728
 729     for(i = 0; i < component.len; ++i) {
 730         ret <<= 4;
 731         ret += hex_to_int(component.str[i]);
 732     }
 733
 734     return ret;
 735 }
 736
 737 /* Converts an IPv6 address into it's 128 bits (16 bytes) numerical value.
 738  *
 739  * This function assumes that the ipv6_address has already been validated.
 740  */
 741 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
 742     DWORD i, cur_component = 0;
 743     BOOL already_passed_elision = FALSE;
 744
 745     for(i = 0; i < address->h16_count; ++i) {
 746         if(address->elision) {
 747             if(address->components[i].str > address->elision && !already_passed_elision) {
 748                 /* Means we just passed the elision and need to add it's values to
 749                  * 'number' before we do anything else.
 750                  */
 751                 DWORD j = 0;
 752                 for(j = 0; j < address->elision_size; j+=2)
 753                     number[cur_component++] = 0;
 754
 755                 already_passed_elision = TRUE;
 756             }
 757         }
 758
 759         number[cur_component++] = h16tous(address->components[i]);
 760     }
 761
 762     /* Case when the elision appears after the h16 components. */
 763     if(!already_passed_elision && address->elision) {
 764         for(i = 0; i < address->elision_size; i+=2)
 765             number[cur_component++] = 0;
 766         already_passed_elision = TRUE;
 767     }
 768
 769     if(address->ipv4) {
 770         UINT value = ipv4toui(address->ipv4, address->ipv4_len);
 771
 772         if(cur_component != 6) {
 773             ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
 774             return FALSE;
 775         }
 776
 777         number[cur_component++] = (value >> 16) & 0xffff;
 778         number[cur_component] = value & 0xffff;
 779     }
 780
 781     return TRUE;
 782 }
 783
 784 /* Checks if the characters pointed to by 'ptr' are
 785  * a percent encoded data octet.
 786  *
 787  * pct-encoded = "%" HEXDIG HEXDIG
 788  */
 789 static BOOL check_pct_encoded(const WCHAR **ptr) {
 790     const WCHAR *start = *ptr;
 791
 792     if(**ptr != '%')
 793         return FALSE;
 794
 795     ++(*ptr);
 796     if(!is_hexdigit(**ptr)) {
 797         *ptr = start;
 798         return FALSE;
 799     }
 800
 801     ++(*ptr);
 802     if(!is_hexdigit(**ptr)) {
 803         *ptr = start;
 804         return FALSE;
 805     }
 806
 807     ++(*ptr);
 808     return TRUE;
 809 }
 810
 811 /* dec-octet   = DIGIT                 ; 0-9
 812  *             / %x31-39 DIGIT         ; 10-99
 813  *             / "1" 2DIGIT            ; 100-199
 814  *             / "2" %x30-34 DIGIT     ; 200-249
 815  *             / "25" %x30-35          ; 250-255
 816  */
 817 static BOOL check_dec_octet(const WCHAR **ptr) {
 818     const WCHAR *c1, *c2, *c3;
 819
 820     c1 = *ptr;
 821     /* A dec-octet must be at least 1 digit long. */
 822     if(*c1 < '0' || *c1 > '9')
 823         return FALSE;
 824
 825     ++(*ptr);
 826
 827     c2 = *ptr;
 828     /* Since the 1 digit requirment was meet, it doesn't
 829      * matter if this is a DIGIT value, it's considered a
 830      * dec-octet.
 831      */
 832     if(*c2 < '0' || *c2 > '9')
 833         return TRUE;
 834
 835     ++(*ptr);
 836
 837     c3 = *ptr;
 838     /* Same explanation as above. */
 839     if(*c3 < '0' || *c3 > '9')
 840         return TRUE;
 841
 842     /* Anything > 255 isn't a valid IP dec-octet. */
 843     if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
 844         *ptr = c1;
 845         return FALSE;
 846     }
 847
 848     ++(*ptr);
 849     return TRUE;
 850 }
 851
 852 /* Checks if there is an implicit IPv4 address in the host component of the URI.
 853  * The max value of an implicit IPv4 address is UINT_MAX.
 854  *
 855  *  Ex:
 856  *      "234567" would be considered an implicit IPv4 address.
 857  */
 858 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
 859     const WCHAR *start = *ptr;
 860     ULONGLONG ret = 0;
 861     *val = 0;
 862
 863     while(is_num(**ptr)) {
 864         ret = ret*10 + (**ptr - '0');
 865
 866         if(ret > UINT_MAX) {
 867             *ptr = start;
 868             return FALSE;
 869         }
 870         ++(*ptr);
 871     }
 872
 873     if(*ptr == start)
 874         return FALSE;
 875
 876     *val = ret;
 877     return TRUE;
 878 }
 879
 880 /* Checks if the string contains an IPv4 address.
 881  *
 882  * This function has a strict mode or a non-strict mode of operation
 883  * When 'strict' is set to FALSE this function will return TRUE if
 884  * the string contains at least 'dec-octet "." dec-octet' since partial
 885  * IPv4 addresses will be normalized out into full IPv4 addresses. When
 886  * 'strict' is set this function expects there to be a full IPv4 address.
 887  *
 888  * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
 889  */
 890 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
 891     const WCHAR *start = *ptr;
 892
 893     if(!check_dec_octet(ptr)) {
 894         *ptr = start;
 895         return FALSE;
 896     }
 897
 898     if(**ptr != '.') {
 899         *ptr = start;
 900         return FALSE;
 901     }
 902
 903     ++(*ptr);
 904     if(!check_dec_octet(ptr)) {
 905         *ptr = start;
 906         return FALSE;
 907     }
 908
 909     if(**ptr != '.') {
 910         if(strict) {
 911             *ptr = start;
 912             return FALSE;
 913         } else
 914             return TRUE;
 915     }
 916
 917     ++(*ptr);
 918     if(!check_dec_octet(ptr)) {
 919         *ptr = start;
 920         return FALSE;
 921     }
 922
 923     if(**ptr != '.') {
 924         if(strict) {
 925             *ptr = start;
 926             return FALSE;
 927         } else
 928             return TRUE;
 929     }
 930
 931     ++(*ptr);
 932     if(!check_dec_octet(ptr)) {
 933         *ptr = start;
 934         return FALSE;
 935     }
 936
 937     /* Found a four digit ip address. */
 938     return TRUE;
 939 }
 940 /* Tries to parse the scheme name of the URI.
 941  *
 942  * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
 943  * NOTE: Windows accepts a number as the first character of a scheme.
 944  */
 945 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data) {
 946     const WCHAR *start = *ptr;
 947
 948     data->scheme = NULL;
 949     data->scheme_len = 0;
 950
 951     while(**ptr) {
 952         if(**ptr == '*' && *ptr == start) {
 953             /* Might have found a wildcard scheme. If it is the next
 954              * char has to be a ':' for it to be a valid URI
 955              */
 956             ++(*ptr);
 957             break;
 958         } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
 959            **ptr != '-' && **ptr != '.')
 960             break;
 961
 962         (*ptr)++;
 963     }
 964
 965     if(*ptr == start)
 966         return FALSE;
 967
 968     /* Schemes must end with a ':' */
 969     if(**ptr != ':') {
 970         *ptr = start;
 971         return FALSE;
 972     }
 973
 974     data->scheme = start;
 975     data->scheme_len = *ptr - start;
 976
 977     ++(*ptr);
 978     return TRUE;
 979 }
 980
 981 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
 982  * the deduced URL_SCHEME in data->scheme_type.
 983  */
 984 static BOOL parse_scheme_type(parse_data *data) {
 985     /* If there's scheme data then see if it's a recognized scheme. */
 986     if(data->scheme && data->scheme_len) {
 987         DWORD i;
 988
 989         for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
 990             if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
 991                 /* Has to be a case insensitive compare. */
 992                 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
 993                     data->scheme_type = recognized_schemes[i].scheme;
 994                     return TRUE;
 995                 }
 996             }
 997         }
 998
 999         /* If we get here it means it's not a recognized scheme. */
1000         data->scheme_type = URL_SCHEME_UNKNOWN;
1001         return TRUE;
1002     } else if(data->is_relative) {
1003         /* Relative URI's have no scheme. */
1004         data->scheme_type = URL_SCHEME_UNKNOWN;
1005         return TRUE;
1006     } else {
1007         /* Should never reach here! what happened... */
1008         FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
1009         return FALSE;
1010     }
1011 }
1012
1013 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
1014  * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
1015  * using the flags specified in 'flags' (if any). Flags that affect how this function
1016  * operates are the Uri_CREATE_ALLOW_* flags.
1017  *
1018  * All parsed/deduced information will be stored in 'data' when the function returns.
1019  *
1020  * Returns TRUE if it was able to successfully parse the information.
1021  */
1022 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
1023     static const WCHAR fileW[] = {'f','i','l','e',0};
1024     static const WCHAR wildcardW[] = {'*',0};
1025
1026     /* First check to see if the uri could implicitly be a file path. */
1027     if(is_implicit_file_path(*ptr)) {
1028         if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
1029             data->scheme = fileW;
1030             data->scheme_len = lstrlenW(fileW);
1031             data->has_implicit_scheme = TRUE;
1032
1033             TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
1034         } else {
1035             /* Window's does not consider anything that can implicitly be a file
1036              * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
1037              */
1038             TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
1039                     ptr, data, flags);
1040             return FALSE;
1041         }
1042     } else if(!parse_scheme_name(ptr, data)) {
1043         /* No Scheme was found, this means it could be:
1044          *      a) an implicit Wildcard scheme
1045          *      b) a relative URI
1046          *      c) a invalid URI.
1047          */
1048         if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
1049             data->scheme = wildcardW;
1050             data->scheme_len = lstrlenW(wildcardW);
1051             data->has_implicit_scheme = TRUE;
1052
1053             TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
1054         } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
1055             data->is_relative = TRUE;
1056             TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
1057         } else {
1058             TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
1059             return FALSE;
1060         }
1061     }
1062
1063     if(!data->is_relative)
1064         TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
1065                 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
1066
1067     if(!parse_scheme_type(data))
1068         return FALSE;
1069
1070     TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
1071     return TRUE;
1072 }
1073
1074 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
1075  * a URI can consist of "username:password@", or just "username@".
1076  *
1077  * RFC def:
1078  * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
1079  *
1080  * NOTES:
1081  *  1)  If there is more than one ':' in the userinfo part of the URI Windows
1082  *      uses the first occurence of ':' to delimit the username and password
1083  *      components.
1084  *
1085  *      ex:
1086  *          ftp://user:pass:word@winehq.org
1087  *
1088  *      Would yield, "user" as the username and "pass:word" as the password.
1089  *
1090  *  2)  Windows allows any character to appear in the "userinfo" part of
1091  *      a URI, as long as it's not an authority delimeter character set.
1092  */
1093 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
1094     data->userinfo = *ptr;
1095     data->userinfo_split = -1;
1096
1097     while(**ptr != '@') {
1098         if(**ptr == ':' && data->userinfo_split == -1)
1099             data->userinfo_split = *ptr - data->userinfo;
1100         else if(**ptr == '%') {
1101             /* If it's a known scheme type, it has to be a valid percent
1102              * encoded value.
1103              */
1104             if(!check_pct_encoded(ptr)) {
1105                 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1106                     *ptr = data->userinfo;
1107                     data->userinfo = NULL;
1108                     data->userinfo_split = -1;
1109
1110                     TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1111                     return;
1112                 }
1113             } else
1114                 continue;
1115         } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
1116             break;
1117
1118         ++(*ptr);
1119     }
1120
1121     if(**ptr != '@') {
1122         *ptr = data->userinfo;
1123         data->userinfo = NULL;
1124         data->userinfo_split = -1;
1125
1126         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1127         return;
1128     }
1129
1130     data->userinfo_len = *ptr - data->userinfo;
1131     TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
1132             debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
1133     ++(*ptr);
1134 }
1135
1136 /* Attempts to parse a port from the URI.
1137  *
1138  * NOTES:
1139  *  Windows seems to have a cap on what the maximum value
1140  *  for a port can be. The max value is USHORT_MAX.
1141  *
1142  * port = *DIGIT
1143  */
1144 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1145     UINT port = 0;
1146     data->port = *ptr;
1147
1148     while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1149         if(!is_num(**ptr)) {
1150             *ptr = data->port;
1151             data->port = NULL;
1152             return FALSE;
1153         }
1154
1155         port = port*10 + (**ptr-'0');
1156
1157         if(port > USHORT_MAX) {
1158             *ptr = data->port;
1159             data->port = NULL;
1160             return FALSE;
1161         }
1162
1163         ++(*ptr);
1164     }
1165
1166     data->port_value = port;
1167     data->port_len = *ptr - data->port;
1168
1169     TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1170         debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1171     return TRUE;
1172 }
1173
1174 /* Attempts to parse a IPv4 address from the URI.
1175  *
1176  * NOTES:
1177  *  Window's normalizes IPv4 addresses, This means there's three
1178  *  possibilities for the URI to contain an IPv4 address.
1179  *      1)  A well formed address (ex. 192.2.2.2).
1180  *      2)  A partially formed address. For example "192.0" would
1181  *          normalize to "192.0.0.0" during canonicalization.
1182  *      3)  An implicit IPv4 address. For example "256" would
1183  *          normalize to "0.0.1.0" during canonicalization. Also
1184  *          note that the maximum value for an implicit IP address
1185  *          is UINT_MAX, if the value in the URI exceeds this then
1186  *          it is not considered an IPv4 address.
1187  */
1188 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1189     const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1190     data->host = *ptr;
1191
1192     if(!check_ipv4address(ptr, FALSE)) {
1193         if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1194             TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1195                 ptr, data, flags);
1196             *ptr = data->host;
1197             data->host = NULL;
1198             return FALSE;
1199         } else
1200             data->has_implicit_ip = TRUE;
1201     }
1202
1203     /* Check if what we found is the only part of the host name (if it isn't
1204      * we don't have an IPv4 address).
1205      */
1206     if(**ptr == ':') {
1207         ++(*ptr);
1208         if(!parse_port(ptr, data, flags)) {
1209             *ptr = data->host;
1210             data->host = NULL;
1211             return FALSE;
1212         }
1213     } else if(!is_auth_delim(**ptr, !is_unknown)) {
1214         /* Found more data which belongs the host, so this isn't an IPv4. */
1215         *ptr = data->host;
1216         data->host = NULL;
1217         data->has_implicit_ip = FALSE;
1218         return FALSE;
1219     }
1220
1221     data->host_len = *ptr - data->host;
1222     data->host_type = Uri_HOST_IPV4;
1223
1224     TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1225         ptr, data, flags, debugstr_wn(data->host, data->host_len),
1226         data->host_len, data->host_type);
1227     return TRUE;
1228 }
1229
1230 /* Attempts to parse the reg-name from the URI.
1231  *
1232  * Because of the way Windows handles ':' this function also
1233  * handles parsing the port.
1234  *
1235  * reg-name = *( unreserved / pct-encoded / sub-delims )
1236  *
1237  * NOTE:
1238  *  Windows allows everything, but, the characters in "auth_delims" and ':'
1239  *  to appear in a reg-name, unless it's an unknown scheme type then ':' is
1240  *  allowed to appear (even if a valid port isn't after it).
1241  *
1242  *  Windows doesn't like host names which start with '[' and end with ']'
1243  *  and don't contain a valid IP literal address in between them.
1244  *
1245  *  On Windows if an '[' is encountered in the host name the ':' no longer
1246  *  counts as a delimiter until you reach the next ']' or an "authority delimeter".
1247  *
1248  *  A reg-name CAN be empty.
1249  */
1250 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags) {
1251     const BOOL has_start_bracket = **ptr == '[';
1252     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1253     BOOL inside_brackets = has_start_bracket;
1254     BOOL ignore_col = FALSE;
1255
1256     /* We have to be careful with file schemes. */
1257     if(data->scheme_type == URL_SCHEME_FILE) {
1258         /* This is because an implicit file scheme could be "C:\\test" and it
1259          * would trick this function into thinking the host is "C", when after
1260          * canonicalization the host would end up being an empty string.
1261          */
1262         if(is_alpha(**ptr) && *(*ptr+1) == ':') {
1263             /* Regular old drive paths don't have a host type (or host name). */
1264             data->host_type = Uri_HOST_UNKNOWN;
1265             data->host = *ptr;
1266             data->host_len = 0;
1267             return TRUE;
1268         } else if(**ptr == '\\' && *(*ptr+1) == '\\')
1269             /* Skip past the "\\" of a UNC path. */
1270             *ptr += 2;
1271     }
1272
1273     data->host = *ptr;
1274
1275     while(!is_auth_delim(**ptr, known_scheme)) {
1276         if(**ptr == ':' && !ignore_col) {
1277             /* We can ignore ':' if were inside brackets.*/
1278             if(!inside_brackets) {
1279                 const WCHAR *tmp = (*ptr)++;
1280
1281                 /* Attempt to parse the port. */
1282                 if(!parse_port(ptr, data, flags)) {
1283                     /* Windows expects there to be a valid port for known scheme types. */
1284                     if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1285                         *ptr = data->host;
1286                         data->host = NULL;
1287                         TRACE("(%p %p %x): Expected valid port\n", ptr, data, flags);
1288                         return FALSE;
1289                     } else
1290                         /* Windows gives up on trying to parse a port when it
1291                          * encounters 1 invalid port.
1292                          */
1293                         ignore_col = TRUE;
1294                 } else {
1295                     data->host_len = tmp - data->host;
1296                     break;
1297                 }
1298             }
1299         } else if(**ptr == '%' && known_scheme) {
1300             /* Has to be a legit % encoded value. */
1301             if(!check_pct_encoded(ptr)) {
1302                 *ptr = data->host;
1303                 data->host = NULL;
1304                 return FALSE;
1305             } else
1306                 continue;
1307         } else if(**ptr == ']')
1308             inside_brackets = FALSE;
1309         else if(**ptr == '[')
1310             inside_brackets = TRUE;
1311
1312         ++(*ptr);
1313     }
1314
1315     if(has_start_bracket) {
1316         /* Make sure the last character of the host wasn't a ']'. */
1317         if(*(*ptr-1) == ']') {
1318             TRACE("(%p %p %x): Expected an IP literal inside of the host\n",
1319                 ptr, data, flags);
1320             *ptr = data->host;
1321             data->host = NULL;
1322             return FALSE;
1323         }
1324     }
1325
1326     /* Don't overwrite our length if we found a port earlier. */
1327     if(!data->port)
1328         data->host_len = *ptr - data->host;
1329
1330     /* If the host is empty, then it's an unknown host type. */
1331     if(data->host_len == 0)
1332         data->host_type = Uri_HOST_UNKNOWN;
1333     else
1334         data->host_type = Uri_HOST_DNS;
1335
1336     TRACE("(%p %p %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags,
1337         debugstr_wn(data->host, data->host_len), data->host_len);
1338     return TRUE;
1339 }
1340
1341 /* Attempts to parse an IPv6 address out of the URI.
1342  *
1343  * IPv6address =                               6( h16 ":" ) ls32
1344  *                /                       "::" 5( h16 ":" ) ls32
1345  *                / [               h16 ] "::" 4( h16 ":" ) ls32
1346  *                / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1347  *                / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1348  *                / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1349  *                / [ *4( h16 ":" ) h16 ] "::"              ls32
1350  *                / [ *5( h16 ":" ) h16 ] "::"              h16
1351  *                / [ *6( h16 ":" ) h16 ] "::"
1352  *
1353  * ls32        = ( h16 ":" h16 ) / IPv4address
1354  *             ; least-significant 32 bits of address.
1355  *
1356  * h16         = 1*4HEXDIG
1357  *             ; 16 bits of address represented in hexadecimal.
1358  *
1359  * Modeled after google-url's 'DoParseIPv6' function.
1360  */
1361 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1362     const WCHAR *start, *cur_start;
1363     ipv6_address ip;
1364
1365     start = cur_start = *ptr;
1366     memset(&ip, 0, sizeof(ipv6_address));
1367
1368     for(;; ++(*ptr)) {
1369         /* Check if we're on the last character of the host. */
1370         BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1371                         || **ptr == ']');
1372
1373         BOOL is_split = (**ptr == ':');
1374         BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1375
1376         /* Check if we're at the end of of the a component, or
1377          * if we're at the end of the IPv6 address.
1378          */
1379         if(is_split || is_end) {
1380             DWORD cur_len = 0;
1381
1382             cur_len = *ptr - cur_start;
1383
1384             /* h16 can't have a length > 4. */
1385             if(cur_len > 4) {
1386                 *ptr = start;
1387
1388                 TRACE("(%p %p %x): h16 component to long.\n",
1389                     ptr, data, flags);
1390                 return FALSE;
1391             }
1392
1393             if(cur_len == 0) {
1394                 /* An h16 component can't have the length of 0 unless
1395                  * the elision is at the beginning of the address, or
1396                  * at the end of the address.
1397                  */
1398                 if(!((*ptr == start && is_elision) ||
1399                     (is_end && (*ptr-2) == ip.elision))) {
1400                     *ptr = start;
1401                     TRACE("(%p %p %x): IPv6 component can not have a length of 0.\n",
1402                         ptr, data, flags);
1403                     return FALSE;
1404                 }
1405             }
1406
1407             if(cur_len > 0) {
1408                 /* An IPv6 address can have no more than 8 h16 components. */
1409                 if(ip.h16_count >= 8) {
1410                     *ptr = start;
1411                     TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n",
1412                         ptr, data, flags);
1413                     return FALSE;
1414                 }
1415
1416                 ip.components[ip.h16_count].str = cur_start;
1417                 ip.components[ip.h16_count].len = cur_len;
1418
1419                 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1420                     ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1421                     ip.h16_count);
1422                 ++ip.h16_count;
1423             }
1424         }
1425
1426         if(is_end)
1427             break;
1428
1429         if(is_elision) {
1430             /* A IPv6 address can only have 1 elision ('::'). */
1431             if(ip.elision) {
1432                 *ptr = start;
1433
1434                 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1435                     ptr, data, flags);
1436                 return FALSE;
1437             }
1438
1439             ip.elision = *ptr;
1440             ++(*ptr);
1441         }
1442
1443         if(is_split)
1444             cur_start = *ptr+1;
1445         else {
1446             if(!check_ipv4address(ptr, TRUE)) {
1447                 if(!is_hexdigit(**ptr)) {
1448                     /* Not a valid character for an IPv6 address. */
1449                     *ptr = start;
1450                     return FALSE;
1451                 }
1452             } else {
1453                 /* Found an IPv4 address. */
1454                 ip.ipv4 = cur_start;
1455                 ip.ipv4_len = *ptr - cur_start;
1456
1457                 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1458                     ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1459                     ip.ipv4_len);
1460
1461                 /* IPv4 addresses can only appear at the end of a IPv6. */
1462                 break;
1463             }
1464         }
1465     }
1466
1467     compute_ipv6_comps_size(&ip);
1468
1469     /* Make sure the IPv6 address adds up to 16 bytes. */
1470     if(ip.components_size + ip.elision_size != 16) {
1471         *ptr = start;
1472         TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1473             ptr, data, flags);
1474         return FALSE;
1475     }
1476
1477     if(ip.elision_size == 2) {
1478         /* For some reason on Windows if an elision that represents
1479          * only 1 h16 component is encountered at the very begin or
1480          * end of an IPv6 address, Windows does not consider it a
1481          * valid IPv6 address.
1482          *
1483          *  Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1484          *      of all the components == 128bits.
1485          */
1486          if(ip.elision < ip.components[0].str ||
1487             ip.elision > ip.components[ip.h16_count-1].str) {
1488             *ptr = start;
1489             TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1490                 ptr, data, flags);
1491             return FALSE;
1492         }
1493     }
1494
1495     data->host_type = Uri_HOST_IPV6;
1496     data->has_ipv6 = TRUE;
1497     data->ipv6_address = ip;
1498
1499     TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1500         ptr, data, flags, debugstr_wn(start, *ptr-start),
1501         *ptr-start);
1502     return TRUE;
1503 }
1504
1505 /*  IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1506 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1507     const WCHAR *start = *ptr;
1508
1509     /* IPvFuture has to start with a 'v' or 'V'. */
1510     if(**ptr != 'v' && **ptr != 'V')
1511         return FALSE;
1512
1513     /* Following the v their must be atleast 1 hexdigit. */
1514     ++(*ptr);
1515     if(!is_hexdigit(**ptr)) {
1516         *ptr = start;
1517         return FALSE;
1518     }
1519
1520     ++(*ptr);
1521     while(is_hexdigit(**ptr))
1522         ++(*ptr);
1523
1524     /* End of the hexdigit sequence must be a '.' */
1525     if(**ptr != '.') {
1526         *ptr = start;
1527         return FALSE;
1528     }
1529
1530     ++(*ptr);
1531     if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1532         *ptr = start;
1533         return FALSE;
1534     }
1535
1536     ++(*ptr);
1537     while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1538         ++(*ptr);
1539
1540     data->host_type = Uri_HOST_UNKNOWN;
1541
1542     TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1543         debugstr_wn(start, *ptr-start), *ptr-start);
1544
1545     return TRUE;
1546 }
1547
1548 /* IP-literal = "[" ( IPv6address / IPvFuture  ) "]" */
1549 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags) {
1550     data->host = *ptr;
1551
1552     if(**ptr != '[') {
1553         data->host = NULL;
1554         return FALSE;
1555     }
1556
1557     ++(*ptr);
1558     if(!parse_ipv6address(ptr, data, flags)) {
1559         if(!parse_ipvfuture(ptr, data, flags)) {
1560             *ptr = data->host;
1561             data->host = NULL;
1562             return FALSE;
1563         }
1564     }
1565
1566     if(**ptr != ']') {
1567         *ptr = data->host;
1568         data->host = NULL;
1569         return FALSE;
1570     }
1571
1572     ++(*ptr);
1573     if(**ptr == ':') {
1574         ++(*ptr);
1575         /* If a valid port is not found, then let it trickle down to
1576          * parse_reg_name.
1577          */
1578         if(!parse_port(ptr, data, flags)) {
1579             *ptr = data->host;
1580             data->host = NULL;
1581             return FALSE;
1582         }
1583     } else
1584         data->host_len = *ptr - data->host;
1585
1586     return TRUE;
1587 }
1588
1589 /* Parses the host information from the URI.
1590  *
1591  * host = IP-literal / IPv4address / reg-name
1592  */
1593 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags) {
1594     if(!parse_ip_literal(ptr, data, flags)) {
1595         if(!parse_ipv4address(ptr, data, flags)) {
1596             if(!parse_reg_name(ptr, data, flags)) {
1597                 TRACE("(%p %p %x): Malformed URI, Unknown host type.\n",
1598                     ptr, data, flags);
1599                 return FALSE;
1600             }
1601         }
1602     }
1603
1604     return TRUE;
1605 }
1606
1607 /* Parses the authority information from the URI.
1608  *
1609  * authority   = [ userinfo "@" ] host [ ":" port ]
1610  */
1611 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1612     parse_userinfo(ptr, data, flags);
1613
1614     /* Parsing the port will happen during one of the host parsing
1615      * routines (if the URI has a port).
1616      */
1617     if(!parse_host(ptr, data, flags))
1618         return FALSE;
1619
1620     return TRUE;
1621 }
1622
1623 /* Attempts to parse the path information of a hierarchical URI. */
1624 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
1625     const WCHAR *start = *ptr;
1626     static const WCHAR slash[] = {'/',0};
1627
1628     if(is_path_delim(**ptr)) {
1629         if(data->scheme_type == URL_SCHEME_WILDCARD) {
1630             /* Wildcard schemes don't get a '/' attached if their path is
1631              * empty.
1632              */
1633             data->path = NULL;
1634             data->path_len = 0;
1635         } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1636             /* If the path component is empty, then a '/' is added. */
1637             data->path = slash;
1638             data->path_len = 1;
1639         }
1640     } else {
1641         while(!is_path_delim(**ptr)) {
1642             if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
1643                data->scheme_type != URL_SCHEME_FILE) {
1644                 if(!check_pct_encoded(ptr)) {
1645                     *ptr = start;
1646                     return FALSE;
1647                 } else
1648                     continue;
1649             } else if(**ptr == '\\') {
1650                 /* Not allowed to have a backslash if NO_CANONICALIZE is set
1651                  * and the scheme is known type (but not a file scheme).
1652                  */
1653                 if(flags & Uri_CREATE_NO_CANONICALIZE) {
1654                     if(data->scheme_type != URL_SCHEME_FILE &&
1655                        data->scheme_type != URL_SCHEME_UNKNOWN) {
1656                         *ptr = start;
1657                         return FALSE;
1658                     }
1659                 }
1660             }
1661
1662             ++(*ptr);
1663         }
1664
1665         /* The only time a URI doesn't have a path is when
1666          * the NO_CANONICALIZE flag is set and the raw URI
1667          * didn't contain one.
1668          */
1669         if(*ptr == start) {
1670             data->path = NULL;
1671             data->path_len = 0;
1672         } else {
1673             data->path = start;
1674             data->path_len = *ptr - start;
1675         }
1676     }
1677
1678     if(data->path)
1679         TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
1680             debugstr_wn(data->path, data->path_len), data->path_len);
1681     else
1682         TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
1683
1684     return TRUE;
1685 }
1686
1687 /* Parses the path of a opaque URI (much less strict then the parser
1688  * for a hierarchical URI).
1689  *
1690  * NOTE:
1691  *  Windows allows invalid % encoded data to appear in opaque URI paths
1692  *  for unknown scheme types.
1693  */
1694 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) {
1695     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1696
1697     data->path = *ptr;
1698
1699     while(!is_path_delim(**ptr)) {
1700         if(**ptr == '%' && known_scheme) {
1701             if(!check_pct_encoded(ptr)) {
1702                 *ptr = data->path;
1703                 data->path = NULL;
1704                 return FALSE;
1705             } else
1706                 continue;
1707         }
1708
1709         ++(*ptr);
1710     }
1711
1712     data->path_len = *ptr - data->path;
1713     TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags,
1714         debugstr_wn(data->path, data->path_len), data->path_len);
1715     return TRUE;
1716 }
1717
1718 /* Determines how the URI should be parsed after the scheme information.
1719  *
1720  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
1721  * which then the authority and path information will be parsed out. Otherwise, the
1722  * URI will be treated as an opaque URI which the authority information is not parsed
1723  * out.
1724  *
1725  * RFC 3896 definition of hier-part:
1726  *
1727  * hier-part   = "//" authority path-abempty
1728  *                 / path-absolute
1729  *                 / path-rootless
1730  *                 / path-empty
1731  *
1732  * MSDN opaque URI definition:
1733  *  scheme ":" path [ "#" fragment ]
1734  *
1735  * NOTES:
1736  *  If the URI is of an unknown scheme type and has a "//" following the scheme then it
1737  *  is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
1738  *  set then it is considered an opaque URI reguardless of what follows the scheme information
1739  *  (per MSDN documentation).
1740  */
1741 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
1742     const WCHAR *start = *ptr;
1743
1744     /* Checks if the authority information needs to be parsed.
1745      *
1746      * Relative URI's aren't hierarchical URI's, but, they could trick
1747      * "check_hierarchical" into thinking it is, so we need to explicitly
1748      * make sure it's not relative. Also, if the URI is an implicit file
1749      * scheme it might not contain a "//", but, it's considered hierarchical
1750      * anyways. Wildcard Schemes are always considered hierarchical
1751      */
1752     if(data->scheme_type == URL_SCHEME_WILDCARD ||
1753        data->scheme_type == URL_SCHEME_FILE ||
1754        (!data->is_relative && check_hierarchical(ptr))) {
1755         /* Only treat it as a hierarchical URI if the scheme_type is known or
1756          * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
1757          */
1758         if(data->scheme_type != URL_SCHEME_UNKNOWN ||
1759            !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
1760             TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
1761             data->is_opaque = FALSE;
1762
1763             if(data->scheme_type == URL_SCHEME_FILE)
1764                 /* Skip past the "//" after the scheme (if any). */
1765                 check_hierarchical(ptr);
1766
1767             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
1768             if(!parse_authority(ptr, data, flags))
1769                 return FALSE;
1770
1771             return parse_path_hierarchical(ptr, data, flags);
1772         } else
1773             /* Reset ptr to it's starting position so opaque path parsing
1774              * begins at the correct location.
1775              */
1776             *ptr = start;
1777     }
1778
1779     /* If it reaches here, then the URI will be treated as an opaque
1780      * URI.
1781      */
1782
1783     TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
1784
1785     data->is_opaque = TRUE;
1786     if(!parse_path_opaque(ptr, data, flags))
1787         return FALSE;
1788
1789     return TRUE;
1790 }
1791
1792 /* Attempts to parse the query string from the URI.
1793  *
1794  * NOTES:
1795  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
1796  *  data is allowed appear in the query string. For unknown scheme types
1797  *  invalid percent encoded data is allowed to appear reguardless.
1798  */
1799 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) {
1800     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1801
1802     if(**ptr != '?') {
1803         TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags);
1804         return TRUE;
1805     }
1806
1807     data->query = *ptr;
1808
1809     ++(*ptr);
1810     while(**ptr && **ptr != '#') {
1811         if(**ptr == '%' && known_scheme &&
1812            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
1813             if(!check_pct_encoded(ptr)) {
1814                 *ptr = data->query;
1815                 data->query = NULL;
1816                 return FALSE;
1817             } else
1818                 continue;
1819         }
1820
1821         ++(*ptr);
1822     }
1823
1824     data->query_len = *ptr - data->query;
1825
1826     TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags,
1827         debugstr_wn(data->query, data->query_len), data->query_len);
1828     return TRUE;
1829 }
1830
1831 /* Attempts to parse the fragment from the URI.
1832  *
1833  * NOTES:
1834  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
1835  *  data is allowed appear in the query string. For unknown scheme types
1836  *  invalid percent encoded data is allowed to appear reguardless.
1837  */
1838 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) {
1839     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1840
1841     if(**ptr != '#') {
1842         TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags);
1843         return TRUE;
1844     }
1845
1846     data->fragment = *ptr;
1847
1848     ++(*ptr);
1849     while(**ptr) {
1850         if(**ptr == '%' && known_scheme &&
1851            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
1852             if(!check_pct_encoded(ptr)) {
1853                 *ptr = data->fragment;
1854                 data->fragment = NULL;
1855                 return FALSE;
1856             } else
1857                 continue;
1858         }
1859
1860         ++(*ptr);
1861     }
1862
1863     data->fragment_len = *ptr - data->fragment;
1864
1865     TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags,
1866         debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
1867     return TRUE;
1868 }
1869
1870 /* Parses and validates the components of the specified by data->uri
1871  * and stores the information it parses into 'data'.
1872  *
1873  * Returns TRUE if it successfully parsed the URI. False otherwise.
1874  */
1875 static BOOL parse_uri(parse_data *data, DWORD flags) {
1876     const WCHAR *ptr;
1877     const WCHAR **pptr;
1878
1879     ptr = data->uri;
1880     pptr = &ptr;
1881
1882     TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
1883
1884     if(!parse_scheme(pptr, data, flags))
1885         return FALSE;
1886
1887     if(!parse_hierpart(pptr, data, flags))
1888         return FALSE;
1889
1890     if(!parse_query(pptr, data, flags))
1891         return FALSE;
1892
1893     if(!parse_fragment(pptr, data, flags))
1894         return FALSE;
1895
1896     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
1897     return TRUE;
1898 }
1899
1900 /* Canonicalizes the userinfo of the URI represented by the parse_data.
1901  *
1902  * Canonicalization of the userinfo is a simple process. If there are any percent
1903  * encoded characters that fall in the "unreserved" character set, they are decoded
1904  * to their actual value. If a character is not in the "unreserved" or "reserved" sets
1905  * then it is percent encoded. Other than that the characters are copied over without
1906  * change.
1907  */
1908 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1909     DWORD i = 0;
1910
1911     uri->userinfo_start = uri->userinfo_split = -1;
1912     uri->userinfo_len = 0;
1913
1914     if(!data->userinfo)
1915         /* URI doesn't have userinfo, so nothing to do here. */
1916         return TRUE;
1917
1918     uri->userinfo_start = uri->canon_len;
1919
1920     while(i < data->userinfo_len) {
1921         if(data->userinfo[i] == ':' && uri->userinfo_split == -1)
1922             /* Windows only considers the first ':' as the delimiter. */
1923             uri->userinfo_split = uri->canon_len - uri->userinfo_start;
1924         else if(data->userinfo[i] == '%') {
1925             /* Only decode % encoded values for known scheme types. */
1926             if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1927                 /* See if the value really needs decoded. */
1928                 WCHAR val = decode_pct_val(data->userinfo + i);
1929                 if(is_unreserved(val)) {
1930                     if(!computeOnly)
1931                         uri->canon_uri[uri->canon_len] = val;
1932
1933                     ++uri->canon_len;
1934
1935                     /* Move pass the hex characters. */
1936                     i += 3;
1937                     continue;
1938                 }
1939             }
1940         } else if(!is_reserved(data->userinfo[i]) && !is_unreserved(data->userinfo[i]) &&
1941                   data->userinfo[i] != '\\') {
1942             /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
1943              * is NOT set.
1944              */
1945             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
1946                 if(!computeOnly)
1947                     pct_encode_val(data->userinfo[i], uri->canon_uri + uri->canon_len);
1948
1949                 uri->canon_len += 3;
1950                 ++i;
1951                 continue;
1952             }
1953         }
1954
1955         if(!computeOnly)
1956             /* Nothing special, so just copy the character over. */
1957             uri->canon_uri[uri->canon_len] = data->userinfo[i];
1958
1959         ++uri->canon_len;
1960         ++i;
1961     }
1962
1963     uri->userinfo_len = uri->canon_len - uri->userinfo_start;
1964     if(!computeOnly)
1965         TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
1966                 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
1967                 uri->userinfo_split, uri->userinfo_len);
1968
1969     /* Now insert the '@' after the userinfo. */
1970     if(!computeOnly)
1971         uri->canon_uri[uri->canon_len] = '@';
1972
1973     ++uri->canon_len;
1974     return TRUE;
1975 }
1976
1977 /* Attempts to canonicalize a reg_name.
1978  *
1979  * Things that happen:
1980  *  1)  If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
1981  *      lower cased. Unless it's an unknown scheme type, which case it's
1982  *      no lower cased reguardless.
1983  *
1984  *  2)  Unreserved % encoded characters are decoded for known
1985  *      scheme types.
1986  *
1987  *  3)  Forbidden characters are % encoded as long as
1988  *      Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
1989  *      it isn't an unknown scheme type.
1990  *
1991  *  4)  If it's a file scheme and the host is "localhost" it's removed.
1992  */
1993 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
1994                                   DWORD flags, BOOL computeOnly) {
1995     static const WCHAR localhostW[] =
1996             {'l','o','c','a','l','h','o','s','t',0};
1997     const WCHAR *ptr;
1998     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1999
2000     uri->host_start = uri->canon_len;
2001
2002     if(data->scheme_type == URL_SCHEME_FILE &&
2003        data->host_len == lstrlenW(localhostW)) {
2004         if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
2005             uri->host_start = -1;
2006             uri->host_len = 0;
2007             uri->host_type = Uri_HOST_UNKNOWN;
2008             return TRUE;
2009         }
2010     }
2011
2012     for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
2013         if(*ptr == '%' && known_scheme) {
2014             WCHAR val = decode_pct_val(ptr);
2015             if(is_unreserved(val)) {
2016                 /* If NO_CANONICALZE is not set, then windows lower cases the
2017                  * decoded value.
2018                  */
2019                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
2020                     if(!computeOnly)
2021                         uri->canon_uri[uri->canon_len] = tolowerW(val);
2022                 } else {
2023                     if(!computeOnly)
2024                         uri->canon_uri[uri->canon_len] = val;
2025                 }
2026                 ++uri->canon_len;
2027
2028                 /* Skip past the % encoded character. */
2029                 ptr += 2;
2030                 continue;
2031             } else {
2032                 /* Just copy the % over. */
2033                 if(!computeOnly)
2034                     uri->canon_uri[uri->canon_len] = *ptr;
2035                 ++uri->canon_len;
2036             }
2037         } else if(*ptr == '\\') {
2038             /* Only unknown scheme types could have made it here with a '\\' in the host name. */
2039             if(!computeOnly)
2040                 uri->canon_uri[uri->canon_len] = *ptr;
2041             ++uri->canon_len;
2042         } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2043                   !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
2044             if(!computeOnly) {
2045                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2046
2047                 /* The percent encoded value gets lower cased also. */
2048                 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2049                     uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
2050                     uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
2051                 }
2052             }
2053
2054             uri->canon_len += 3;
2055         } else {
2056             if(!computeOnly) {
2057                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
2058                     uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
2059                 else
2060                     uri->canon_uri[uri->canon_len] = *ptr;
2061             }
2062
2063             ++uri->canon_len;
2064         }
2065     }
2066
2067     uri->host_len = uri->canon_len - uri->host_start;
2068
2069     if(!computeOnly)
2070         TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
2071             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2072             uri->host_len);
2073
2074     if(!computeOnly)
2075         find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
2076             &(uri->domain_offset));
2077
2078     return TRUE;
2079 }
2080
2081 /* Attempts to canonicalize an implicit IPv4 address. */
2082 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2083     uri->host_start = uri->canon_len;
2084
2085     TRACE("%u\n", data->implicit_ipv4);
2086     /* For unknown scheme types Window's doesn't convert
2087      * the value into an IP address, but, it still considers
2088      * it an IPv4 address.
2089      */
2090     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2091         if(!computeOnly)
2092             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2093         uri->canon_len += data->host_len;
2094     } else {
2095         if(!computeOnly)
2096             uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
2097         else
2098             uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
2099     }
2100
2101     uri->host_len = uri->canon_len - uri->host_start;
2102     uri->host_type = Uri_HOST_IPV4;
2103
2104     if(!computeOnly)
2105         TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
2106             data, uri, flags, computeOnly,
2107             debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2108             uri->host_len);
2109
2110     return TRUE;
2111 }
2112
2113 /* Attempts to canonicalize an IPv4 address.
2114  *
2115  * If the parse_data represents a URI that has an implicit IPv4 address
2116  * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
2117  * the implicit IP address exceeds the value of UINT_MAX (maximum value
2118  * for an IPv4 address) it's canonicalized as if were a reg-name.
2119  *
2120  * If the parse_data contains a partial or full IPv4 address it normalizes it.
2121  * A partial IPv4 address is something like "192.0" and would be normalized to
2122  * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
2123  * be normalized to "192.2.1.3".
2124  *
2125  * NOTES:
2126  *  Window's ONLY normalizes IPv4 address for known scheme types (one that isn't
2127  *  URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
2128  *  the original URI into the canonicalized URI, but, it still recognizes URI's
2129  *  host type as HOST_IPV4.
2130  */
2131 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2132     if(data->has_implicit_ip)
2133         return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
2134     else {
2135         uri->host_start = uri->canon_len;
2136
2137         /* Windows only normalizes for known scheme types. */
2138         if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2139             /* parse_data contains a partial or full IPv4 address, so normalize it. */
2140             DWORD i, octetDigitCount = 0, octetCount = 0;
2141             BOOL octetHasDigit = FALSE;
2142
2143             for(i = 0; i < data->host_len; ++i) {
2144                 if(data->host[i] == '0' && !octetHasDigit) {
2145                     /* Can ignore leading zeros if:
2146                      *  1) It isn't the last digit of the octet.
2147                      *  2) i+1 != data->host_len
2148                      *  3) i+1 != '.'
2149                      */
2150                     if(octetDigitCount == 2 ||
2151                        i+1 == data->host_len ||
2152                        data->host[i+1] == '.') {
2153                         if(!computeOnly)
2154                             uri->canon_uri[uri->canon_len] = data->host[i];
2155                         ++uri->canon_len;
2156                         TRACE("Adding zero\n");
2157                     }
2158                 } else if(data->host[i] == '.') {
2159                     if(!computeOnly)
2160                         uri->canon_uri[uri->canon_len] = data->host[i];
2161                     ++uri->canon_len;
2162
2163                     octetDigitCount = 0;
2164                     octetHasDigit = FALSE;
2165                     ++octetCount;
2166                 } else {
2167                     if(!computeOnly)
2168                         uri->canon_uri[uri->canon_len] = data->host[i];
2169                     ++uri->canon_len;
2170
2171                     ++octetDigitCount;
2172                     octetHasDigit = TRUE;
2173                 }
2174             }
2175
2176             /* Make sure the canonicalized IP address has 4 dec-octets.
2177              * If doesn't add "0" ones until there is 4;
2178              */
2179             for( ; octetCount < 3; ++octetCount) {
2180                 if(!computeOnly) {
2181                     uri->canon_uri[uri->canon_len] = '.';
2182                     uri->canon_uri[uri->canon_len+1] = '0';
2183                 }
2184
2185                 uri->canon_len += 2;
2186             }
2187         } else {
2188             /* Windows doesn't normalize addresses in unknown schemes. */
2189             if(!computeOnly)
2190                 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2191             uri->canon_len += data->host_len;
2192         }
2193
2194         uri->host_len = uri->canon_len - uri->host_start;
2195         if(!computeOnly)
2196             TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
2197                 data, uri, flags, computeOnly,
2198                 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2199                 uri->host_len);
2200     }
2201
2202     return TRUE;
2203 }
2204
2205 /* Attempts to canonicalize the IPv6 address of the URI.
2206  *
2207  * Multiple things happen during the canonicalization of an IPv6 address:
2208  *  1)  Any leading zero's in an h16 component are removed.
2209  *      Ex: [0001:0022::] -> [1:22::]
2210  *
2211  *  2)  The longest sequence of zero h16 components are compressed
2212  *      into a "::" (elision). If there's a tie, the first is choosen.
2213  *
2214  *      Ex: [0:0:0:0:1:6:7:8]   -> [::1:6:7:8]
2215  *          [0:0:0:0:1:2::]     -> [::1:2:0:0]
2216  *          [0:0:1:2:0:0:7:8]   -> [::1:2:0:0:7:8]
2217  *
2218  *  3)  If an IPv4 address is attached to the IPv6 address, it's
2219  *      also normalized.
2220  *      Ex: [::001.002.022.000] -> [::1.2.22.0]
2221  *
2222  *  4)  If an elision is present, but, only represents 1 h16 component
2223  *      it's expanded.
2224  *
2225  *      Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
2226  *
2227  *  5)  If the IPv6 address contains an IPv4 address and there exists
2228  *      at least 1 non-zero h16 component the IPv4 address is converted
2229  *      into two h16 components, otherwise it's normalized and kept as is.
2230  *
2231  *      Ex: [::192.200.003.4]       -> [::192.200.3.4]
2232  *          [ffff::192.200.003.4]   -> [ffff::c0c8:3041]
2233  *
2234  * NOTE:
2235  *  For unknown scheme types Windows simply copies the address over without any
2236  *  changes.
2237  *
2238  *  IPv4 address can be included in an elision if all its components are 0's.
2239  */
2240 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
2241                                      DWORD flags, BOOL computeOnly) {
2242     uri->host_start = uri->canon_len;
2243
2244     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2245         if(!computeOnly)
2246             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2247         uri->canon_len += data->host_len;
2248     } else {
2249         USHORT values[8];
2250         INT elision_start;
2251         DWORD i, elision_len;
2252
2253         if(!ipv6_to_number(&(data->ipv6_address), values)) {
2254             TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
2255                 data, uri, flags, computeOnly);
2256             return FALSE;
2257         }
2258
2259         if(!computeOnly)
2260             uri->canon_uri[uri->canon_len] = '[';
2261         ++uri->canon_len;
2262
2263         /* Find where the elision should occur (if any). */
2264         compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
2265
2266         TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
2267             computeOnly, elision_start, elision_len);
2268
2269         for(i = 0; i < 8; ++i) {
2270             BOOL in_elision = (elision_start > -1 && i >= elision_start &&
2271                                i < elision_start+elision_len);
2272             BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
2273                             data->ipv6_address.h16_count == 0);
2274
2275             if(i == elision_start) {
2276                 if(!computeOnly) {
2277                     uri->canon_uri[uri->canon_len] = ':';
2278                     uri->canon_uri[uri->canon_len+1] = ':';
2279                 }
2280                 uri->canon_len += 2;
2281             }
2282
2283             /* We can ignore the current component if we're in the elision. */
2284             if(in_elision)
2285                 continue;
2286
2287             /* We only add a ':' if we're not at i == 0, or when we're at
2288              * the very end of elision range since the ':' colon was handled
2289              * earlier. Otherwise we would end up with ":::" after elision.
2290              */
2291             if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
2292                 if(!computeOnly)
2293                     uri->canon_uri[uri->canon_len] = ':';
2294                 ++uri->canon_len;
2295             }
2296
2297             if(do_ipv4) {
2298                 UINT val;
2299                 DWORD len;
2300
2301                 /* Combine the two parts of the IPv4 address values. */
2302                 val = values[i];
2303                 val <<= 16;
2304                 val += values[i+1];
2305
2306                 if(!computeOnly)
2307                     len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
2308                 else
2309                     len = ui2ipv4(NULL, val);
2310
2311                 uri->canon_len += len;
2312                 ++i;
2313             } else {
2314                 /* Write a regular h16 component to the URI. */
2315
2316                 /* Short circuit for the trivial case. */
2317                 if(values[i] == 0) {
2318                     if(!computeOnly)
2319                         uri->canon_uri[uri->canon_len] = '0';
2320                     ++uri->canon_len;
2321                 } else {
2322                     static const WCHAR formatW[] = {'%','x',0};
2323
2324                     if(!computeOnly)
2325                         uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
2326                                             formatW, values[i]);
2327                     else {
2328                         WCHAR tmp[5];
2329                         uri->canon_len += sprintfW(tmp, formatW, values[i]);
2330                     }
2331                 }
2332             }
2333         }
2334
2335         /* Add the closing ']'. */
2336         if(!computeOnly)
2337             uri->canon_uri[uri->canon_len] = ']';
2338         ++uri->canon_len;
2339     }
2340
2341     uri->host_len = uri->canon_len - uri->host_start;
2342
2343     if(!computeOnly)
2344         TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2345             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2346             uri->host_len);
2347
2348     return TRUE;
2349 }
2350
2351 /* Attempts to canonicalize the host of the URI (if any). */
2352 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2353     uri->host_start = -1;
2354     uri->host_len = 0;
2355     uri->domain_offset = -1;
2356
2357     if(data->host) {
2358         switch(data->host_type) {
2359         case Uri_HOST_DNS:
2360             uri->host_type = Uri_HOST_DNS;
2361             if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2362                 return FALSE;
2363
2364             break;
2365         case Uri_HOST_IPV4:
2366             uri->host_type = Uri_HOST_IPV4;
2367             if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2368                 return FALSE;
2369
2370             break;
2371         case Uri_HOST_IPV6:
2372             if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2373                 return FALSE;
2374
2375             uri->host_type = Uri_HOST_IPV6;
2376             break;
2377         case Uri_HOST_UNKNOWN:
2378             if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2379                 uri->host_start = uri->canon_len;
2380
2381                 /* Nothing happens to unknown host types. */
2382                 if(!computeOnly)
2383                     memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2384                 uri->canon_len += data->host_len;
2385                 uri->host_len = data->host_len;
2386             }
2387
2388             uri->host_type = Uri_HOST_UNKNOWN;
2389             break;
2390         default:
2391             FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2392                     uri, flags, computeOnly, data->host_type);
2393             return FALSE;
2394        }
2395    }
2396
2397    return TRUE;
2398 }
2399
2400 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2401     BOOL has_default_port = FALSE;
2402     USHORT default_port = 0;
2403     DWORD i;
2404
2405     uri->has_port = FALSE;
2406
2407     /* Check if the scheme has a default port. */
2408     for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2409         if(default_ports[i].scheme == data->scheme_type) {
2410             has_default_port = TRUE;
2411             default_port = default_ports[i].port;
2412             break;
2413         }
2414     }
2415
2416     if(data->port || has_default_port)
2417         uri->has_port = TRUE;
2418
2419     /* Possible cases:
2420      *  1)  Has a port which is the default port.
2421      *  2)  Has a port (not the default).
2422      *  3)  Doesn't have a port, but, scheme has a default port.
2423      *  4)  No port.
2424      */
2425     if(has_default_port && data->port && data->port_value == default_port) {
2426         /* If it's the default port and this flag isn't set, don't do anything. */
2427         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2428             /* Copy the original port over. */
2429             if(!computeOnly) {
2430                 uri->canon_uri[uri->canon_len] = ':';
2431                 memcpy(uri->canon_uri+uri->canon_len+1, data->port, data->port_len*sizeof(WCHAR));
2432             }
2433             uri->canon_len += data->port_len+1;
2434         }
2435
2436         uri->port = default_port;
2437     } else if(data->port) {
2438         if(!computeOnly)
2439             uri->canon_uri[uri->canon_len] = ':';
2440         ++uri->canon_len;
2441
2442         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2443             /* Copy the original over without changes. */
2444             if(!computeOnly)
2445                 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2446             uri->canon_len += data->port_len;
2447         } else {
2448             const WCHAR formatW[] = {'%','u',0};
2449             INT len = 0;
2450             if(!computeOnly)
2451                 len = sprintfW(uri->canon_uri+uri->canon_len, formatW, data->port_value);
2452             else {
2453                 WCHAR tmp[6];
2454                 len = sprintfW(tmp, formatW, data->port_value);
2455             }
2456             uri->canon_len += len;
2457         }
2458
2459         uri->port = data->port_value;
2460     } else if(has_default_port)
2461         uri->port = default_port;
2462
2463     return TRUE;
2464 }
2465
2466 /* Canonicalizes the authority of the URI represented by the parse_data. */
2467 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2468     uri->authority_start = uri->canon_len;
2469     uri->authority_len = 0;
2470
2471     if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2472         return FALSE;
2473
2474     if(!canonicalize_host(data, uri, flags, computeOnly))
2475         return FALSE;
2476
2477     if(!canonicalize_port(data, uri, flags, computeOnly))
2478         return FALSE;
2479
2480     if(uri->host_start != -1)
2481         uri->authority_len = uri->canon_len - uri->authority_start;
2482     else
2483         uri->authority_start = -1;
2484
2485     return TRUE;
2486 }
2487
2488 /* Attempts to canonicalize the path of a hierarchical URI.
2489  *
2490  * Things that happen:
2491  *  1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
2492  *      flag is set or it's a file URI. Forbidden characters are always encoded
2493  *      for file schemes reguardless and forbidden characters are never encoded
2494  *      for unknown scheme types.
2495  *
2496  *  2). For known scheme types '\\' are changed to '/'.
2497  *
2498  *  3). Percent encoded, unreserved characters are decoded to their actual values.
2499  *      Unless the scheme type is unknown. For file schemes any percent encoded
2500  *      character in the unreserved or reserved set is decoded.
2501  *
2502  *  4). For File schemes if the path is starts with a drive letter and doesn't
2503  *      start with a '/' then one is appended.
2504  *      Ex: file://c:/test.mp3 -> file:///c:/test.mp3
2505  *
2506  *  5). Dot segments are removed from the path for all scheme types
2507  *      unless NO_CANONICALIZE flag is set. Dot segments aren't removed
2508  *      for wildcard scheme types.
2509  *
2510  * NOTES:
2511  *      file://c:/test%20test   -> file:///c:/test%2520test
2512  *      file://c:/test%3Etest   -> file:///c:/test%253Etest
2513  *      file:///c:/test%20test  -> file:///c:/test%20test
2514  *      file:///c:/test%test    -> file:///c:/test%25test
2515  */
2516 static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
2517                                            DWORD flags, BOOL computeOnly) {
2518     const WCHAR *ptr;
2519     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2520     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
2521
2522     BOOL escape_pct = FALSE;
2523
2524     if(!data->path) {
2525         uri->path_start = -1;
2526         uri->path_len = 0;
2527         return TRUE;
2528     }
2529
2530     uri->path_start = uri->canon_len;
2531
2532     /* Check if a '/' needs to be appended for the file scheme. */
2533     if(is_file) {
2534         if(data->path_len > 1 && is_alpha(*(data->path)) &&
2535            *(data->path+1) == ':') {
2536             if(!computeOnly)
2537                 uri->canon_uri[uri->canon_len] = '/';
2538             uri->canon_len++;
2539             escape_pct = TRUE;
2540         }
2541     }
2542
2543     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
2544         if(*ptr == '%') {
2545             const WCHAR *tmp = ptr;
2546             WCHAR val;
2547
2548             /* Check if the % represents a valid encoded char, or if it needs encoded. */
2549             BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
2550             val = decode_pct_val(ptr);
2551
2552             if(force_encode || escape_pct) {
2553                 /* Escape the percent sign in the file URI. */
2554                 if(!computeOnly)
2555                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2556                 uri->canon_len += 3;
2557             } else if((is_unreserved(val) && known_scheme) ||
2558                       (is_file && (is_unreserved(val) || is_reserved(val)))) {
2559                 if(!computeOnly)
2560                     uri->canon_uri[uri->canon_len] = val;
2561                 ++uri->canon_len;
2562
2563                 ptr += 2;
2564                 continue;
2565             } else {
2566                 if(!computeOnly)
2567                     uri->canon_uri[uri->canon_len] = *ptr;
2568                 ++uri->canon_len;
2569             }
2570         } else if(*ptr == '\\' && known_scheme) {
2571             if(!computeOnly)
2572                 uri->canon_uri[uri->canon_len] = '/';
2573             ++uri->canon_len;
2574         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2575                   (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
2576             /* Escape the forbidden character. */
2577             if(!computeOnly)
2578                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2579             uri->canon_len += 3;
2580         } else {
2581             if(!computeOnly)
2582                 uri->canon_uri[uri->canon_len] = *ptr;
2583             ++uri->canon_len;
2584         }
2585     }
2586
2587     uri->path_len = uri->canon_len - uri->path_start;
2588
2589     /* Removing the dot segments only happens when it's not in
2590      * computeOnly mode and it's not a wildcard scheme.
2591      */
2592     if(!computeOnly && data->scheme_type != URL_SCHEME_WILDCARD) {
2593         if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2594             /* Remove the dot segments (if any) and reset everything to the new
2595              * correct length.
2596              */
2597             DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len);
2598             uri->canon_len -= uri->path_len-new_len;
2599             uri->path_len = new_len;
2600         }
2601     }
2602
2603     if(!computeOnly)
2604         TRACE("Canonicalized path %s len=%d\n",
2605             debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
2606             uri->path_len);
2607
2608     return TRUE;
2609 }
2610
2611 /* Attempts to canonicalize the path for an opaque URI.
2612  *
2613  * For known scheme types:
2614  *  1)  forbidden characters are percent encoded if
2615  *      NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
2616  *
2617  *  2)  Percent encoded, unreserved characters are decoded
2618  *      to their actual values, for known scheme types.
2619  *
2620  *  3)  '\\' are changed to '/' for known scheme types
2621  *      except for mailto schemes.
2622  */
2623 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2624     const WCHAR *ptr;
2625     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2626
2627     if(!data->path) {
2628         uri->path_start = -1;
2629         uri->path_len = 0;
2630         return TRUE;
2631     }
2632
2633     uri->path_start = uri->canon_len;
2634
2635     /* Windows doesn't allow a "//" to appear after the scheme
2636      * of a URI, if it's an opaque URI.
2637      */
2638     if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
2639         /* So it inserts a "/." before the "//" if it exists. */
2640         if(!computeOnly) {
2641             uri->canon_uri[uri->canon_len] = '/';
2642             uri->canon_uri[uri->canon_len+1] = '.';
2643         }
2644
2645         uri->canon_len += 2;
2646     }
2647
2648     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
2649         if(*ptr == '%' && known_scheme) {
2650             WCHAR val = decode_pct_val(ptr);
2651
2652             if(is_unreserved(val)) {
2653                 if(!computeOnly)
2654                     uri->canon_uri[uri->canon_len] = val;
2655                 ++uri->canon_len;
2656
2657                 ptr += 2;
2658                 continue;
2659             } else {
2660                 if(!computeOnly)
2661                     uri->canon_uri[uri->canon_len] = *ptr;
2662                 ++uri->canon_len;
2663             }
2664         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2665                   !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2666             if(!computeOnly)
2667                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2668             uri->canon_len += 3;
2669         } else {
2670             if(!computeOnly)
2671                 uri->canon_uri[uri->canon_len] = *ptr;
2672             ++uri->canon_len;
2673         }
2674     }
2675
2676     uri->path_len = uri->canon_len - uri->path_start;
2677
2678     TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
2679         debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
2680     return TRUE;
2681 }
2682
2683 /* Determines how the URI represented by the parse_data should be canonicalized.
2684  *
2685  * Essentially, if the parse_data represents an hierarchical URI then it calls
2686  * canonicalize_authority and the canonicalization functions for the path. If the
2687  * URI is opaque it canonicalizes the path of the URI.
2688  */
2689 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2690     if(!data->is_opaque) {
2691         /* "//" is only added for non-wildcard scheme types. */
2692         if(data->scheme_type != URL_SCHEME_WILDCARD) {
2693             if(!computeOnly) {
2694                 INT pos = uri->canon_len;
2695
2696                 uri->canon_uri[pos] = '/';
2697                 uri->canon_uri[pos+1] = '/';
2698            }
2699            uri->canon_len += 2;
2700         }
2701
2702         if(!canonicalize_authority(data, uri, flags, computeOnly))
2703             return FALSE;
2704
2705         /* TODO: Canonicalize the path of the URI. */
2706         if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
2707             return FALSE;
2708
2709     } else {
2710         /* Opaque URI's don't have an authority. */
2711         uri->userinfo_start = uri->userinfo_split = -1;
2712         uri->userinfo_len = 0;
2713         uri->host_start = -1;
2714         uri->host_len = 0;
2715         uri->host_type = Uri_HOST_UNKNOWN;
2716         uri->has_port = FALSE;
2717         uri->authority_start = -1;
2718         uri->authority_len = 0;
2719         uri->domain_offset = -1;
2720
2721         if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
2722             return FALSE;
2723     }
2724
2725     if(uri->path_start > -1 && !computeOnly)
2726         /* Finding file extensions happens for both types of URIs. */
2727         uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len);
2728     else
2729         uri->extension_offset = -1;
2730
2731     return TRUE;
2732 }
2733
2734 /* Attempts to canonicalize the query string of the URI.
2735  *
2736  * Things that happen:
2737  *  1)  For known scheme types forbidden characters
2738  *      are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set
2739  *      or NO_ENCODE_FORBIDDEN_CHARACTERS is set.
2740  *
2741  *  2)  For known scheme types, percent encoded, unreserved characters
2742  *      are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set.
2743  */
2744 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2745     const WCHAR *ptr, *end;
2746     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2747
2748     if(!data->query) {
2749         uri->query_start = -1;
2750         uri->query_len = 0;
2751         return TRUE;
2752     }
2753
2754     uri->query_start = uri->canon_len;
2755
2756     end = data->query+data->query_len;
2757     for(ptr = data->query; ptr < end; ++ptr) {
2758         if(*ptr == '%') {
2759             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2760                 WCHAR val = decode_pct_val(ptr);
2761                 if(is_unreserved(val)) {
2762                     if(!computeOnly)
2763                         uri->canon_uri[uri->canon_len] = val;
2764                     ++uri->canon_len;
2765
2766                     ptr += 2;
2767                     continue;
2768                 }
2769             }
2770         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
2771             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2772                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2773                 if(!computeOnly)
2774                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2775                 uri->canon_len += 3;
2776                 continue;
2777             }
2778         }
2779
2780         if(!computeOnly)
2781             uri->canon_uri[uri->canon_len] = *ptr;
2782         ++uri->canon_len;
2783     }
2784
2785     uri->query_len = uri->canon_len - uri->query_start;
2786
2787     if(!computeOnly)
2788         TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags,
2789             computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len),
2790             uri->query_len);
2791     return TRUE;
2792 }
2793
2794 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2795     const WCHAR *ptr, *end;
2796     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2797
2798     if(!data->fragment) {
2799         uri->fragment_start = -1;
2800         uri->fragment_len = 0;
2801         return TRUE;
2802     }
2803
2804     uri->fragment_start = uri->canon_len;
2805
2806     end = data->fragment + data->fragment_len;
2807     for(ptr = data->fragment; ptr < end; ++ptr) {
2808         if(*ptr == '%') {
2809             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2810                 WCHAR val = decode_pct_val(ptr);
2811                 if(is_unreserved(val)) {
2812                     if(!computeOnly)
2813                         uri->canon_uri[uri->canon_len] = val;
2814                     ++uri->canon_len;
2815
2816                     ptr += 2;
2817                     continue;
2818                 }
2819             }
2820         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
2821             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2822                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2823                 if(!computeOnly)
2824                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2825                 uri->canon_len += 3;
2826                 continue;
2827             }
2828         }
2829
2830         if(!computeOnly)
2831             uri->canon_uri[uri->canon_len] = *ptr;
2832         ++uri->canon_len;
2833     }
2834
2835     uri->fragment_len = uri->canon_len - uri->fragment_start;
2836
2837     if(!computeOnly)
2838         TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags,
2839             computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len),
2840             uri->fragment_len);
2841     return TRUE;
2842 }
2843
2844 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
2845 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2846     uri->scheme_start = -1;
2847     uri->scheme_len = 0;
2848
2849     if(!data->scheme) {
2850         /* The only type of URI that doesn't have to have a scheme is a relative
2851          * URI.
2852          */
2853         if(!data->is_relative) {
2854             FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
2855                     uri, flags, debugstr_w(data->uri));
2856             return FALSE;
2857         }
2858     } else {
2859         if(!computeOnly) {
2860             DWORD i;
2861             INT pos = uri->canon_len;
2862
2863             for(i = 0; i < data->scheme_len; ++i) {
2864                 /* Scheme name must be lower case after canonicalization. */
2865                 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
2866             }
2867
2868             uri->canon_uri[i + pos] = ':';
2869             uri->scheme_start = pos;
2870
2871             TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
2872                     debugstr_wn(uri->canon_uri,  uri->scheme_len), data->scheme_len);
2873         }
2874
2875         /* This happens in both computation modes. */
2876         uri->canon_len += data->scheme_len + 1;
2877         uri->scheme_len = data->scheme_len;
2878     }
2879     return TRUE;
2880 }
2881
2882 /* Compute's what the length of the URI specified by the parse_data will be
2883  * after canonicalization occurs using the specified flags.
2884  *
2885  * This function will return a non-zero value indicating the length of the canonicalized
2886  * URI, or -1 on error.
2887  */
2888 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
2889     Uri uri;
2890
2891     memset(&uri, 0, sizeof(Uri));
2892
2893     TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
2894             debugstr_w(data->uri));
2895
2896     if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
2897         ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
2898         return -1;
2899     }
2900
2901     if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
2902         ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
2903         return -1;
2904     }
2905
2906     if(!canonicalize_query(data, &uri, flags, TRUE)) {
2907         ERR("(%p %x): Failed to compute query string length.\n", data, flags);
2908         return -1;
2909     }
2910
2911     if(!canonicalize_fragment(data, &uri, flags, TRUE)) {
2912         ERR("(%p %x): Failed to compute fragment length.\n", data, flags);
2913         return -1;
2914     }
2915
2916     TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
2917
2918     return uri.canon_len;
2919 }
2920
2921 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
2922  * canonicalization succeededs it will store all the canonicalization information
2923  * in the pointer to the Uri.
2924  *
2925  * To canonicalize a URI this function first computes what the length of the URI
2926  * specified by the parse_data will be. Once this is done it will then perfom the actual
2927  * canonicalization of the URI.
2928  */
2929 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
2930     INT len;
2931
2932     uri->canon_uri = NULL;
2933     len = uri->canon_size = uri->canon_len = 0;
2934
2935     TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
2936
2937     /* First try to compute the length of the URI. */
2938     len = compute_canonicalized_length(data, flags);
2939     if(len == -1) {
2940         ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
2941                 debugstr_w(data->uri));
2942         return E_INVALIDARG;
2943     }
2944
2945     uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
2946     if(!uri->canon_uri)
2947         return E_OUTOFMEMORY;
2948
2949     uri->canon_size = len;
2950     if(!canonicalize_scheme(data, uri, flags, FALSE)) {
2951         ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
2952         heap_free(uri->canon_uri);
2953         return E_INVALIDARG;
2954     }
2955     uri->scheme_type = data->scheme_type;
2956
2957     if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
2958         ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
2959         heap_free(uri->canon_uri);
2960         return E_INVALIDARG;
2961     }
2962
2963     if(!canonicalize_query(data, uri, flags, FALSE)) {
2964         ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n",
2965             data, uri, flags);
2966         return E_INVALIDARG;
2967     }
2968
2969     if(!canonicalize_fragment(data, uri, flags, FALSE)) {
2970         ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n",
2971             data, uri, flags);
2972         return E_INVALIDARG;
2973     }
2974
2975     /* There's a possibility we didn't use all the space we allocated
2976      * earlier.
2977      */
2978     if(uri->canon_len < uri->canon_size) {
2979         /* This happens if the URI is hierarchical and dot
2980          * segments were removed from it's path.
2981          */
2982         WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR));
2983         if(!tmp)
2984             return E_OUTOFMEMORY;
2985
2986         uri->canon_uri = tmp;
2987         uri->canon_size = uri->canon_len;
2988     }
2989
2990     uri->canon_uri[uri->canon_len] = '\0';
2991     TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
2992
2993     return S_OK;
2994 }
2995
2996 #define URI(x)         ((IUri*)  &(x)->lpIUriVtbl)
2997 #define URIBUILDER(x)  ((IUriBuilder*)  &(x)->lpIUriBuilderVtbl)
2998
2999 #define URI_THIS(iface) DEFINE_THIS(Uri, IUri, iface)
3000
3001 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
3002 {
3003     Uri *This = URI_THIS(iface);
3004
3005     if(IsEqualGUID(&IID_IUnknown, riid)) {
3006         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
3007         *ppv = URI(This);
3008     }else if(IsEqualGUID(&IID_IUri, riid)) {
3009         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
3010         *ppv = URI(This);
3011     }else {
3012         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
3013         *ppv = NULL;
3014         return E_NOINTERFACE;
3015     }
3016
3017     IUnknown_AddRef((IUnknown*)*ppv);
3018     return S_OK;
3019 }
3020
3021 static ULONG WINAPI Uri_AddRef(IUri *iface)
3022 {
3023     Uri *This = URI_THIS(iface);
3024     LONG ref = InterlockedIncrement(&This->ref);
3025
3026     TRACE("(%p) ref=%d\n", This, ref);
3027
3028     return ref;
3029 }
3030
3031 static ULONG WINAPI Uri_Release(IUri *iface)
3032 {
3033     Uri *This = URI_THIS(iface);
3034     LONG ref = InterlockedDecrement(&This->ref);
3035
3036     TRACE("(%p) ref=%d\n", This, ref);
3037
3038     if(!ref) {
3039         SysFreeString(This->raw_uri);
3040         heap_free(This->canon_uri);
3041         heap_free(This);
3042     }
3043
3044     return ref;
3045 }
3046
3047 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
3048 {
3049     Uri *This = URI_THIS(iface);
3050     HRESULT hres;
3051     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3052
3053     if(!pbstrProperty)
3054         return E_POINTER;
3055
3056     if(uriProp > Uri_PROPERTY_STRING_LAST) {
3057         /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */
3058         *pbstrProperty = SysAllocStringLen(NULL, 0);
3059         if(!(*pbstrProperty))
3060             return E_OUTOFMEMORY;
3061
3062         /* It only returns S_FALSE for the ZONE property... */
3063         if(uriProp == Uri_PROPERTY_ZONE)
3064             return S_FALSE;
3065         else
3066             return S_OK;
3067     }
3068
3069     /* Don't have support for flags yet. */
3070     if(dwFlags) {
3071         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3072         return E_NOTIMPL;
3073     }
3074
3075     switch(uriProp) {
3076     case Uri_PROPERTY_ABSOLUTE_URI:
3077         *pbstrProperty = SysAllocString(This->canon_uri);
3078
3079         if(!(*pbstrProperty))
3080             hres = E_OUTOFMEMORY;
3081         else
3082             hres = S_OK;
3083
3084         break;
3085     case Uri_PROPERTY_AUTHORITY:
3086         if(This->authority_start > -1) {
3087             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
3088             hres = S_OK;
3089         } else {
3090             *pbstrProperty = SysAllocStringLen(NULL, 0);
3091             hres = S_FALSE;
3092         }
3093
3094         if(!(*pbstrProperty))
3095             hres = E_OUTOFMEMORY;
3096
3097         break;
3098     case Uri_PROPERTY_DISPLAY_URI:
3099         /* The Display URI contains everything except for the userinfo for known
3100          * scheme types.
3101          */
3102         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
3103             *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len);
3104
3105             if(*pbstrProperty) {
3106                 /* Copy everything before the userinfo over. */
3107                 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
3108                 /* Copy everything after the userinfo over. */
3109                 memcpy(*pbstrProperty+This->userinfo_start,
3110                    This->canon_uri+This->userinfo_start+This->userinfo_len+1,
3111                    (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR));
3112             }
3113         } else
3114             *pbstrProperty = SysAllocString(This->canon_uri);
3115
3116         if(!(*pbstrProperty))
3117             hres = E_OUTOFMEMORY;
3118         else
3119             hres = S_OK;
3120
3121         break;
3122     case Uri_PROPERTY_DOMAIN:
3123         if(This->domain_offset > -1) {
3124             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
3125                                                This->host_len-This->domain_offset);
3126             hres = S_OK;
3127         } else {
3128             *pbstrProperty = SysAllocStringLen(NULL, 0);
3129             hres = S_FALSE;
3130         }
3131
3132         if(!(*pbstrProperty))
3133             hres = E_OUTOFMEMORY;
3134
3135         break;
3136     case Uri_PROPERTY_EXTENSION:
3137         if(This->extension_offset > -1) {
3138             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset,
3139                                                This->path_len-This->extension_offset);
3140             hres = S_OK;
3141         } else {
3142             *pbstrProperty = SysAllocStringLen(NULL, 0);
3143             hres = S_FALSE;
3144         }
3145
3146         if(!(*pbstrProperty))
3147             hres = E_OUTOFMEMORY;
3148
3149         break;
3150     case Uri_PROPERTY_FRAGMENT:
3151         if(This->fragment_start > -1) {
3152             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len);
3153             hres = S_OK;
3154         } else {
3155             *pbstrProperty = SysAllocStringLen(NULL, 0);
3156             hres = S_FALSE;
3157         }
3158
3159         if(!(*pbstrProperty))
3160             hres = E_OUTOFMEMORY;
3161
3162         break;
3163     case Uri_PROPERTY_HOST:
3164         if(This->host_start > -1) {
3165             /* The '[' and ']' aren't included for IPv6 addresses. */
3166             if(This->host_type == Uri_HOST_IPV6)
3167                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
3168             else
3169                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
3170
3171             hres = S_OK;
3172         } else {
3173             *pbstrProperty = SysAllocStringLen(NULL, 0);
3174             hres = S_FALSE;
3175         }
3176
3177         if(!(*pbstrProperty))
3178             hres = E_OUTOFMEMORY;
3179
3180         break;
3181     case Uri_PROPERTY_PASSWORD:
3182         if(This->userinfo_split > -1) {
3183             *pbstrProperty = SysAllocStringLen(
3184                 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
3185                 This->userinfo_len-This->userinfo_split-1);
3186             hres = S_OK;
3187         } else {
3188             *pbstrProperty = SysAllocStringLen(NULL, 0);
3189             hres = S_FALSE;
3190         }
3191
3192         if(!(*pbstrProperty))
3193             return E_OUTOFMEMORY;
3194
3195         break;
3196     case Uri_PROPERTY_PATH:
3197         if(This->path_start > -1) {
3198             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len);
3199             hres = S_OK;
3200         } else {
3201             *pbstrProperty = SysAllocStringLen(NULL, 0);
3202             hres = S_FALSE;
3203         }
3204
3205         if(!(*pbstrProperty))
3206             hres = E_OUTOFMEMORY;
3207
3208         break;
3209     case Uri_PROPERTY_PATH_AND_QUERY:
3210         if(This->path_start > -1) {
3211             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len);
3212             hres = S_OK;
3213         } else if(This->query_start > -1) {
3214             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
3215             hres = S_OK;
3216         } else {
3217             *pbstrProperty = SysAllocStringLen(NULL, 0);
3218             hres = S_FALSE;
3219         }
3220
3221         if(!(*pbstrProperty))
3222             hres = E_OUTOFMEMORY;
3223
3224         break;
3225     case Uri_PROPERTY_QUERY:
3226         if(This->query_start > -1) {
3227             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
3228             hres = S_OK;
3229         } else {
3230             *pbstrProperty = SysAllocStringLen(NULL, 0);
3231             hres = S_FALSE;
3232         }
3233
3234         if(!(*pbstrProperty))
3235             hres = E_OUTOFMEMORY;
3236
3237         break;
3238     case Uri_PROPERTY_RAW_URI:
3239         *pbstrProperty = SysAllocString(This->raw_uri);
3240         if(!(*pbstrProperty))
3241             hres = E_OUTOFMEMORY;
3242         else
3243             hres = S_OK;
3244         break;
3245     case Uri_PROPERTY_SCHEME_NAME:
3246         if(This->scheme_start > -1) {
3247             *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
3248             hres = S_OK;
3249         } else {
3250             *pbstrProperty = SysAllocStringLen(NULL, 0);
3251             hres = S_FALSE;
3252         }
3253
3254         if(!(*pbstrProperty))
3255             hres = E_OUTOFMEMORY;
3256
3257         break;
3258     case Uri_PROPERTY_USER_INFO:
3259         if(This->userinfo_start > -1) {
3260             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
3261             hres = S_OK;
3262         } else {
3263             *pbstrProperty = SysAllocStringLen(NULL, 0);
3264             hres = S_FALSE;
3265         }
3266
3267         if(!(*pbstrProperty))
3268             hres = E_OUTOFMEMORY;
3269
3270         break;
3271     case Uri_PROPERTY_USER_NAME:
3272         if(This->userinfo_start > -1) {
3273             /* If userinfo_split is set, that means a password exists
3274              * so the username is only from userinfo_start to userinfo_split.
3275              */
3276             if(This->userinfo_split > -1) {
3277                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
3278                 hres = S_OK;
3279             } else {
3280                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
3281                 hres = S_OK;
3282             }
3283         } else {
3284             *pbstrProperty = SysAllocStringLen(NULL, 0);
3285             hres = S_FALSE;
3286         }
3287
3288         if(!(*pbstrProperty))
3289             return E_OUTOFMEMORY;
3290
3291         break;
3292     default:
3293         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3294         hres = E_NOTIMPL;
3295     }
3296
3297     return hres;
3298 }
3299
3300 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
3301 {
3302     Uri *This = URI_THIS(iface);
3303     HRESULT hres;
3304     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3305
3306     if(!pcchProperty)
3307         return E_INVALIDARG;
3308
3309     /* Can only return a length for a property if it's a string. */
3310     if(uriProp > Uri_PROPERTY_STRING_LAST)
3311         return E_INVALIDARG;
3312
3313     /* Don't have support for flags yet. */
3314     if(dwFlags) {
3315         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3316         return E_NOTIMPL;
3317     }
3318
3319     switch(uriProp) {
3320     case Uri_PROPERTY_ABSOLUTE_URI:
3321         *pcchProperty = This->canon_len;
3322         hres = S_OK;
3323         break;
3324     case Uri_PROPERTY_AUTHORITY:
3325         *pcchProperty = This->authority_len;
3326         hres = (This->authority_start > -1) ? S_OK : S_FALSE;
3327         break;
3328     case Uri_PROPERTY_DISPLAY_URI:
3329         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1)
3330             *pcchProperty = This->canon_len-This->userinfo_len-1;
3331         else
3332             *pcchProperty = This->canon_len;
3333
3334         hres = S_OK;
3335         break;
3336     case Uri_PROPERTY_DOMAIN:
3337         if(This->domain_offset > -1)
3338             *pcchProperty = This->host_len - This->domain_offset;
3339         else
3340             *pcchProperty = 0;
3341
3342         hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
3343         break;
3344     case Uri_PROPERTY_EXTENSION:
3345         if(This->extension_offset > -1) {
3346             *pcchProperty = This->path_len - This->extension_offset;
3347             hres = S_OK;
3348         } else {
3349             *pcchProperty = 0;
3350             hres = S_FALSE;
3351         }
3352
3353         break;
3354     case Uri_PROPERTY_FRAGMENT:
3355         *pcchProperty = This->fragment_len;
3356         hres = (This->fragment_start > -1) ? S_OK : S_FALSE;
3357         break;
3358     case Uri_PROPERTY_HOST:
3359         *pcchProperty = This->host_len;
3360
3361         /* '[' and ']' aren't included in the length. */
3362         if(This->host_type == Uri_HOST_IPV6)
3363             *pcchProperty -= 2;
3364
3365         hres = (This->host_start > -1) ? S_OK : S_FALSE;
3366         break;
3367     case Uri_PROPERTY_PASSWORD:
3368         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
3369         hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
3370         break;
3371     case Uri_PROPERTY_PATH:
3372         *pcchProperty = This->path_len;
3373         hres = (This->path_start > -1) ? S_OK : S_FALSE;
3374         break;
3375     case Uri_PROPERTY_PATH_AND_QUERY:
3376         *pcchProperty = This->path_len+This->query_len;
3377         hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE;
3378         break;
3379     case Uri_PROPERTY_QUERY:
3380         *pcchProperty = This->query_len;
3381         hres = (This->query_start > -1) ? S_OK : S_FALSE;
3382         break;
3383     case Uri_PROPERTY_RAW_URI:
3384         *pcchProperty = SysStringLen(This->raw_uri);
3385         hres = S_OK;
3386         break;
3387     case Uri_PROPERTY_SCHEME_NAME:
3388         *pcchProperty = This->scheme_len;
3389         hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
3390         break;
3391     case Uri_PROPERTY_USER_INFO:
3392         *pcchProperty = This->userinfo_len;
3393         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
3394         break;
3395     case Uri_PROPERTY_USER_NAME:
3396         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
3397         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
3398         break;
3399     default:
3400         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3401         hres = E_NOTIMPL;
3402     }
3403
3404     return hres;
3405 }
3406
3407 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
3408 {
3409     Uri *This = URI_THIS(iface);
3410     HRESULT hres;
3411
3412     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3413
3414     if(!pcchProperty)
3415         return E_INVALIDARG;
3416
3417     /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
3418      * From what I can tell, instead of checking which URLZONE the URI belongs to it
3419      * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
3420      * function.
3421      */
3422     if(uriProp == Uri_PROPERTY_ZONE) {
3423         *pcchProperty = URLZONE_INVALID;
3424         return E_NOTIMPL;
3425     }
3426
3427     if(uriProp < Uri_PROPERTY_DWORD_START) {
3428         *pcchProperty = 0;
3429         return E_INVALIDARG;
3430     }
3431
3432     switch(uriProp) {
3433     case Uri_PROPERTY_HOST_TYPE:
3434         *pcchProperty = This->host_type;
3435         hres = S_OK;
3436         break;
3437     case Uri_PROPERTY_PORT:
3438         if(!This->has_port) {
3439             *pcchProperty = 0;
3440             hres = S_FALSE;
3441         } else {
3442             *pcchProperty = This->port;
3443             hres = S_OK;
3444         }
3445
3446         break;
3447     case Uri_PROPERTY_SCHEME:
3448         *pcchProperty = This->scheme_type;
3449         hres = S_OK;
3450         break;
3451     default:
3452         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3453         hres = E_NOTIMPL;
3454     }
3455
3456     return hres;
3457 }
3458
3459 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
3460 {
3461     Uri *This = URI_THIS(iface);
3462     TRACE("(%p)->(%d %p)\n", This, uriProp, pfHasProperty);
3463
3464     if(!pfHasProperty)
3465         return E_INVALIDARG;
3466
3467     switch(uriProp) {
3468     case Uri_PROPERTY_ABSOLUTE_URI:
3469         *pfHasProperty = TRUE;
3470         break;
3471     case Uri_PROPERTY_AUTHORITY:
3472         *pfHasProperty = This->authority_start > -1;
3473         break;
3474     case Uri_PROPERTY_DISPLAY_URI:
3475         *pfHasProperty = TRUE;
3476         break;
3477     case Uri_PROPERTY_DOMAIN:
3478         *pfHasProperty = This->domain_offset > -1;
3479         break;
3480     case Uri_PROPERTY_EXTENSION:
3481         *pfHasProperty = This->extension_offset > -1;
3482         break;
3483     case Uri_PROPERTY_FRAGMENT:
3484         *pfHasProperty = This->fragment_start > -1;
3485         break;
3486     case Uri_PROPERTY_HOST:
3487         *pfHasProperty = This->host_start > -1;
3488         break;
3489     case Uri_PROPERTY_PASSWORD:
3490         *pfHasProperty = This->userinfo_split > -1;
3491         break;
3492     case Uri_PROPERTY_PATH:
3493         *pfHasProperty = This->path_start > -1;
3494         break;
3495     case Uri_PROPERTY_PATH_AND_QUERY:
3496         *pfHasProperty = (This->path_start > -1 || This->query_start > -1);
3497         break;
3498     case Uri_PROPERTY_QUERY:
3499         *pfHasProperty = This->query_start > -1;
3500         break;
3501     case Uri_PROPERTY_RAW_URI:
3502         *pfHasProperty = TRUE;
3503         break;
3504     case Uri_PROPERTY_SCHEME_NAME:
3505         *pfHasProperty = This->scheme_start > -1;
3506         break;
3507     case Uri_PROPERTY_USER_INFO:
3508     case Uri_PROPERTY_USER_NAME:
3509         *pfHasProperty = This->userinfo_start > -1;
3510         break;
3511     case Uri_PROPERTY_HOST_TYPE:
3512         *pfHasProperty = TRUE;
3513         break;
3514     case Uri_PROPERTY_PORT:
3515         *pfHasProperty = This->has_port;
3516         break;
3517     case Uri_PROPERTY_SCHEME:
3518         *pfHasProperty = TRUE;
3519         break;
3520     case Uri_PROPERTY_ZONE:
3521         *pfHasProperty = FALSE;
3522         break;
3523     default:
3524         FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty);
3525         return E_NOTIMPL;
3526     }
3527
3528     return S_OK;
3529 }
3530
3531 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
3532 {
3533     TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri);
3534     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0);
3535 }
3536
3537 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
3538 {
3539     TRACE("(%p)->(%p)\n", iface, pstrAuthority);
3540     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
3541 }
3542
3543 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
3544 {
3545     TRACE("(%p)->(%p)\n", iface, pstrDisplayUri);
3546     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0);
3547 }
3548
3549 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
3550 {
3551     TRACE("(%p)->(%p)\n", iface, pstrDomain);
3552     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
3553 }
3554
3555 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
3556 {
3557     TRACE("(%p)->(%p)\n", iface, pstrExtension);
3558     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0);
3559 }
3560
3561 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
3562 {
3563     TRACE("(%p)->(%p)\n", iface, pstrFragment);
3564     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0);
3565 }
3566
3567 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
3568 {
3569     TRACE("(%p)->(%p)\n", iface, pstrHost);
3570     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
3571 }
3572
3573 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
3574 {
3575     TRACE("(%p)->(%p)\n", iface, pstrPassword);
3576     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
3577 }
3578
3579 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
3580 {
3581     TRACE("(%p)->(%p)\n", iface, pstrPath);
3582     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0);
3583 }
3584
3585 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
3586 {
3587     TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery);
3588     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0);
3589 }
3590
3591 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
3592 {
3593     TRACE("(%p)->(%p)\n", iface, pstrQuery);
3594     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0);
3595 }
3596
3597 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
3598 {
3599     Uri *This = URI_THIS(iface);
3600     TRACE("(%p)->(%p)\n", This, pstrRawUri);
3601
3602     /* Just forward the call to GetPropertyBSTR. */
3603     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
3604 }
3605
3606 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
3607 {
3608     Uri *This = URI_THIS(iface);
3609     TRACE("(%p)->(%p)\n", This, pstrSchemeName);
3610     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
3611 }
3612
3613 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
3614 {
3615     TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
3616     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
3617 }
3618
3619 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
3620 {
3621     TRACE("(%p)->(%p)\n", iface, pstrUserName);
3622     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
3623 }
3624
3625 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
3626 {
3627     TRACE("(%p)->(%p)\n", iface, pdwHostType);
3628     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
3629 }
3630
3631 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
3632 {
3633     TRACE("(%p)->(%p)\n", iface, pdwPort);
3634     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
3635 }
3636
3637 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
3638 {
3639     Uri *This = URI_THIS(iface);
3640     TRACE("(%p)->(%p)\n", This, pdwScheme);
3641     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
3642 }
3643
3644 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
3645 {
3646     TRACE("(%p)->(%p)\n", iface, pdwZone);
3647     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
3648 }
3649
3650 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
3651 {
3652     Uri *This = URI_THIS(iface);
3653     TRACE("(%p)->(%p)\n", This, pdwProperties);
3654
3655     if(!pdwProperties)
3656         return E_INVALIDARG;
3657
3658     /* All URIs have these. */
3659     *pdwProperties = Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|
3660                      Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE;
3661
3662     if(This->scheme_start > -1)
3663         *pdwProperties |= Uri_HAS_SCHEME_NAME;
3664
3665     if(This->authority_start > -1) {
3666         *pdwProperties |= Uri_HAS_AUTHORITY;
3667         if(This->userinfo_start > -1)
3668             *pdwProperties |= Uri_HAS_USER_INFO|Uri_HAS_USER_NAME;
3669         if(This->userinfo_split > -1)
3670             *pdwProperties |= Uri_HAS_PASSWORD;
3671         if(This->host_start > -1)
3672             *pdwProperties |= Uri_HAS_HOST;
3673         if(This->domain_offset > -1)
3674             *pdwProperties |= Uri_HAS_DOMAIN;
3675         if(This->has_port)
3676             *pdwProperties |= Uri_HAS_PORT;
3677     }
3678
3679     if(This->path_start > -1)
3680         *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY;
3681     if(This->query_start > -1)
3682         *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY;
3683
3684     if(This->extension_offset > -1)
3685         *pdwProperties |= Uri_HAS_EXTENSION;
3686
3687     if(This->fragment_start > -1)
3688         *pdwProperties |= Uri_HAS_FRAGMENT;
3689
3690     return S_OK;
3691 }
3692
3693 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
3694 {
3695     Uri *This = URI_THIS(iface);
3696     TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual);
3697
3698     if(!pfEqual)
3699         return E_POINTER;
3700
3701     if(!pUri) {
3702         *pfEqual = FALSE;
3703
3704         /* For some reason Windows returns S_OK here... */
3705         return S_OK;
3706     }
3707
3708     FIXME("(%p)->(%p %p)\n", This, pUri, pfEqual);
3709     return E_NOTIMPL;
3710 }
3711
3712 #undef URI_THIS
3713
3714 static const IUriVtbl UriVtbl = {
3715     Uri_QueryInterface,
3716     Uri_AddRef,
3717     Uri_Release,
3718     Uri_GetPropertyBSTR,
3719     Uri_GetPropertyLength,
3720     Uri_GetPropertyDWORD,
3721     Uri_HasProperty,
3722     Uri_GetAbsoluteUri,
3723     Uri_GetAuthority,
3724     Uri_GetDisplayUri,
3725     Uri_GetDomain,
3726     Uri_GetExtension,
3727     Uri_GetFragment,
3728     Uri_GetHost,
3729     Uri_GetPassword,
3730     Uri_GetPath,
3731     Uri_GetPathAndQuery,
3732     Uri_GetQuery,
3733     Uri_GetRawUri,
3734     Uri_GetSchemeName,
3735     Uri_GetUserInfo,
3736     Uri_GetUserName,
3737     Uri_GetHostType,
3738     Uri_GetPort,
3739     Uri_GetScheme,
3740     Uri_GetZone,
3741     Uri_GetProperties,
3742     Uri_IsEqual
3743 };
3744
3745 /***********************************************************************
3746  *           CreateUri (urlmon.@)
3747  */
3748 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
3749 {
3750     Uri *ret;
3751     HRESULT hr;
3752     parse_data data;
3753
3754     TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
3755
3756     if(!ppURI)
3757         return E_INVALIDARG;
3758
3759     if(!pwzURI) {
3760         *ppURI = NULL;
3761         return E_INVALIDARG;
3762     }
3763
3764     /* Check for invalid flags. */
3765     if((dwFlags & Uri_CREATE_DECODE_EXTRA_INFO && dwFlags & Uri_CREATE_NO_DECODE_EXTRA_INFO) ||
3766        (dwFlags & Uri_CREATE_CANONICALIZE && dwFlags & Uri_CREATE_NO_CANONICALIZE) ||
3767        (dwFlags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && dwFlags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) ||
3768        (dwFlags & Uri_CREATE_PRE_PROCESS_HTML_URI && dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) ||
3769        (dwFlags & Uri_CREATE_IE_SETTINGS && dwFlags & Uri_CREATE_NO_IE_SETTINGS)) {
3770         *ppURI = NULL;
3771         return E_INVALIDARG;
3772     }
3773
3774     ret = heap_alloc(sizeof(Uri));
3775     if(!ret)
3776         return E_OUTOFMEMORY;
3777
3778     ret->lpIUriVtbl = &UriVtbl;
3779     ret->ref = 1;
3780
3781     /* Pre process the URI, unless told otherwise. */
3782     if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
3783         ret->raw_uri = pre_process_uri(pwzURI);
3784     else
3785         ret->raw_uri = SysAllocString(pwzURI);
3786
3787     if(!ret->raw_uri) {
3788         heap_free(ret);
3789         return E_OUTOFMEMORY;
3790     }
3791
3792     memset(&data, 0, sizeof(parse_data));
3793     data.uri = ret->raw_uri;
3794
3795     /* Validate and parse the URI into it's components. */
3796     if(!parse_uri(&data, dwFlags)) {
3797         /* Encountered an unsupported or invalid URI */
3798         SysFreeString(ret->raw_uri);
3799         heap_free(ret);
3800         *ppURI = NULL;
3801         return E_INVALIDARG;
3802     }
3803
3804     /* Canonicalize the URI. */
3805     hr = canonicalize_uri(&data, ret, dwFlags);
3806     if(FAILED(hr)) {
3807         SysFreeString(ret->raw_uri);
3808         heap_free(ret);
3809         *ppURI = NULL;
3810         return hr;
3811     }
3812
3813     *ppURI = URI(ret);
3814     return S_OK;
3815 }
3816
3817 #define URIBUILDER_THIS(iface) DEFINE_THIS(UriBuilder, IUriBuilder, iface)
3818
3819 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
3820 {
3821     UriBuilder *This = URIBUILDER_THIS(iface);
3822
3823     if(IsEqualGUID(&IID_IUnknown, riid)) {
3824         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
3825         *ppv = URIBUILDER(This);
3826     }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
3827         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
3828         *ppv = URIBUILDER(This);
3829     }else {
3830         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
3831         *ppv = NULL;
3832         return E_NOINTERFACE;
3833     }
3834
3835     IUnknown_AddRef((IUnknown*)*ppv);
3836     return S_OK;
3837 }
3838
3839 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
3840 {
3841     UriBuilder *This = URIBUILDER_THIS(iface);
3842     LONG ref = InterlockedIncrement(&This->ref);
3843
3844     TRACE("(%p) ref=%d\n", This, ref);
3845
3846     return ref;
3847 }
3848
3849 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
3850 {
3851     UriBuilder *This = URIBUILDER_THIS(iface);
3852     LONG ref = InterlockedDecrement(&This->ref);
3853
3854     TRACE("(%p) ref=%d\n", This, ref);
3855
3856     if(!ref)
3857         heap_free(This);
3858
3859     return ref;
3860 }
3861
3862 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
3863                                                  DWORD        dwAllowEncodingPropertyMask,
3864                                                  DWORD_PTR    dwReserved,
3865                                                  IUri       **ppIUri)
3866 {
3867     UriBuilder *This = URIBUILDER_THIS(iface);
3868     FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3869     return E_NOTIMPL;
3870 }
3871
3872 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
3873                                            DWORD        dwCreateFlags,
3874                                            DWORD        dwAllowEncodingPropertyMask,
3875                                            DWORD_PTR    dwReserved,
3876                                            IUri       **ppIUri)
3877 {
3878     UriBuilder *This = URIBUILDER_THIS(iface);
3879     FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3880     return E_NOTIMPL;
3881 }
3882
3883 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
3884                                          DWORD        dwCreateFlags,
3885                                          DWORD        dwUriBuilderFlags,
3886                                          DWORD        dwAllowEncodingPropertyMask,
3887                                          DWORD_PTR    dwReserved,
3888                                          IUri       **ppIUri)
3889 {
3890     UriBuilder *This = URIBUILDER_THIS(iface);
3891     FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
3892         dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3893     return E_NOTIMPL;
3894 }
3895
3896 static HRESULT WINAPI  UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
3897 {
3898     UriBuilder *This = URIBUILDER_THIS(iface);
3899     FIXME("(%p)->(%p)\n", This, ppIUri);
3900     return E_NOTIMPL;
3901 }
3902
3903 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
3904 {
3905     UriBuilder *This = URIBUILDER_THIS(iface);
3906     FIXME("(%p)->(%p)\n", This, pIUri);
3907     return E_NOTIMPL;
3908 }
3909
3910 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
3911 {
3912     UriBuilder *This = URIBUILDER_THIS(iface);
3913     FIXME("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
3914     return E_NOTIMPL;
3915 }
3916
3917 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
3918 {
3919     UriBuilder *This = URIBUILDER_THIS(iface);
3920     FIXME("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
3921     return E_NOTIMPL;
3922 }
3923
3924 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
3925 {
3926     UriBuilder *This = URIBUILDER_THIS(iface);
3927     FIXME("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
3928     return E_NOTIMPL;
3929 }
3930
3931 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
3932 {
3933     UriBuilder *This = URIBUILDER_THIS(iface);
3934     FIXME("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
3935     return E_NOTIMPL;
3936 }
3937
3938 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
3939 {
3940     UriBuilder *This = URIBUILDER_THIS(iface);
3941     FIXME("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
3942     return E_NOTIMPL;
3943 }
3944
3945 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
3946 {
3947     UriBuilder *This = URIBUILDER_THIS(iface);
3948     FIXME("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
3949     return E_NOTIMPL;
3950 }
3951
3952 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
3953 {
3954     UriBuilder *This = URIBUILDER_THIS(iface);
3955     FIXME("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
3956     return E_NOTIMPL;
3957 }
3958
3959 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
3960 {
3961     UriBuilder *This = URIBUILDER_THIS(iface);
3962     FIXME("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
3963     return E_NOTIMPL;
3964 }
3965
3966 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
3967 {
3968     UriBuilder *This = URIBUILDER_THIS(iface);
3969     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3970     return E_NOTIMPL;
3971 }
3972
3973 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
3974 {
3975     UriBuilder *This = URIBUILDER_THIS(iface);
3976     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3977     return E_NOTIMPL;
3978 }
3979
3980 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
3981 {
3982     UriBuilder *This = URIBUILDER_THIS(iface);
3983     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3984     return E_NOTIMPL;
3985 }
3986
3987 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
3988 {
3989     UriBuilder *This = URIBUILDER_THIS(iface);
3990     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3991     return E_NOTIMPL;
3992 }
3993
3994 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
3995 {
3996     UriBuilder *This = URIBUILDER_THIS(iface);
3997     FIXME("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
3998     return E_NOTIMPL;
3999 }
4000
4001 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
4002 {
4003     UriBuilder *This = URIBUILDER_THIS(iface);
4004     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4005     return E_NOTIMPL;
4006 }
4007
4008 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
4009 {
4010     UriBuilder *This = URIBUILDER_THIS(iface);
4011     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4012     return E_NOTIMPL;
4013 }
4014
4015 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
4016 {
4017     UriBuilder *This = URIBUILDER_THIS(iface);
4018     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4019     return E_NOTIMPL;
4020 }
4021
4022 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
4023 {
4024     UriBuilder *This = URIBUILDER_THIS(iface);
4025     FIXME("(%p)->(0x%08x)\n", This, dwPropertyMask);
4026     return E_NOTIMPL;
4027 }
4028
4029 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
4030 {
4031     UriBuilder *This = URIBUILDER_THIS(iface);
4032     FIXME("(%p)->(%p)\n", This, pfModified);
4033     return E_NOTIMPL;
4034 }
4035
4036 #undef URIBUILDER_THIS
4037
4038 static const IUriBuilderVtbl UriBuilderVtbl = {
4039     UriBuilder_QueryInterface,
4040     UriBuilder_AddRef,
4041     UriBuilder_Release,
4042     UriBuilder_CreateUriSimple,
4043     UriBuilder_CreateUri,
4044     UriBuilder_CreateUriWithFlags,
4045     UriBuilder_GetIUri,
4046     UriBuilder_SetIUri,
4047     UriBuilder_GetFragment,
4048     UriBuilder_GetHost,
4049     UriBuilder_GetPassword,
4050     UriBuilder_GetPath,
4051     UriBuilder_GetPort,
4052     UriBuilder_GetQuery,
4053     UriBuilder_GetSchemeName,
4054     UriBuilder_GetUserName,
4055     UriBuilder_SetFragment,
4056     UriBuilder_SetHost,
4057     UriBuilder_SetPassword,
4058     UriBuilder_SetPath,
4059     UriBuilder_SetPort,
4060     UriBuilder_SetQuery,
4061     UriBuilder_SetSchemeName,
4062     UriBuilder_SetUserName,
4063     UriBuilder_RemoveProperties,
4064     UriBuilder_HasBeenModified,
4065 };
4066
4067 /***********************************************************************
4068  *           CreateIUriBuilder (urlmon.@)
4069  */
4070 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
4071 {
4072     UriBuilder *ret;
4073
4074     TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
4075
4076     ret = heap_alloc(sizeof(UriBuilder));
4077     if(!ret)
4078         return E_OUTOFMEMORY;
4079
4080     ret->lpIUriBuilderVtbl = &UriBuilderVtbl;
4081     ret->ref = 1;
4082
4083     *ppIUriBuilder = URIBUILDER(ret);
4084     return S_OK;
4085 }